hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From st...@apache.org
Subject svn commit: r1204769 - in /hbase/trunk/src: main/java/org/apache/hadoop/hbase/master/ main/java/org/apache/hadoop/hbase/regionserver/ main/java/org/apache/hadoop/hbase/util/ main/java/org/apache/hadoop/hbase/zookeeper/ test/java/org/apache/hadoop/hbase...
Date Tue, 22 Nov 2011 01:04:41 GMT
Author: stack
Date: Tue Nov 22 01:04:40 2011
New Revision: 1204769

URL: http://svn.apache.org/viewvc?rev=1204769&view=rev
Log:
HBASE-4798 Sleeps and synchronisation improvements for tests

Modified:
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/util/JVMClusterUtil.java
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/util/Sleeper.java
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperNodeTracker.java
    hbase/trunk/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
    hbase/trunk/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java
    hbase/trunk/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java
    hbase/trunk/src/test/java/org/apache/hadoop/hbase/catalog/TestCatalogTrackerOnCluster.java
    hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestRestartCluster.java
    hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLog.java
    hbase/trunk/src/test/java/org/apache/hadoop/hbase/util/TestFSTableDescriptors.java

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java?rev=1204769&r1=1204768&r2=1204769&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java Tue Nov
22 01:04:40 2011
@@ -409,7 +409,8 @@ public class AssignmentManager extends Z
     synchronized(this.regionsInTransition) {
       while (!this.master.isStopped() &&
           this.regionsInTransition.containsKey(hri.getEncodedName())) {
-        this.regionsInTransition.wait();
+        // We expect a notify, but by security we set a timout
+        this.regionsInTransition.wait(100);
       }
     }
     return intransistion;
@@ -1824,7 +1825,10 @@ public class AssignmentManager extends Z
   throws InterruptedException {
     synchronized(regions) {
       while(!regions.containsKey(regionInfo)) {
-        regions.wait();
+        // We should receive a notification, but it's
+        //  better to have a timeout to recheck the condition here:
+        //  it lowers the impact of a race condition if any
+        regions.wait(100);
       }
     }
   }

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMaster.java?rev=1204769&r1=1204768&r2=1204769&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMaster.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMaster.java Tue Nov 22 01:04:40
2011
@@ -420,11 +420,11 @@ implements HMasterInterface, HMasterRegi
         ", cluster-up flag was=" + wasUp);
   }
 
+  // Check if we should stop every second.
+  private Sleeper stopSleeper = new Sleeper(1000, this);
   private void loop() {
-    // Check if we should stop every second.
-    Sleeper sleeper = new Sleeper(1000, this);
     while (!this.stopped) {
-      sleeper.sleep();
+      stopSleeper.sleep();
     }
   }
 
@@ -1504,6 +1504,8 @@ implements HMasterInterface, HMasterRegi
   public void stop(final String why) {
     LOG.info(why);
     this.stopped = true;
+    // We wake up the stopSleeper to stop immediately
+    stopSleeper.skipSleepCycle();
     // If we are a backup master, we need to interrupt wait
     if (this.activeMasterManager != null) {
       synchronized (this.activeMasterManager.clusterHasActiveMaster) {

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java?rev=1204769&r1=1204768&r2=1204769&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java Tue
Nov 22 01:04:40 2011
@@ -967,6 +967,8 @@ public class HRegionServer implements HR
       stop("Failed initialization");
       throw convertThrowableToIOE(cleanup(e, "Failed init"),
           "Region server startup failed");
+    } finally {
+      sleeper.skipSleepCycle();
     }
   }
 
@@ -1552,9 +1554,14 @@ public class HRegionServer implements HR
   public void stop(final String msg) {
     this.stopped = true;
     LOG.info("STOPPED: " + msg);
-    synchronized (this) {
-      // Wakes run() if it is sleeping
-      notifyAll(); // FindBugs NN_NAKED_NOTIFY
+    // Wakes run() if it is sleeping
+    //sleeper.skipSleepCycle();
+    //will be uncommented later, see discussion in jira 4798
+  }
+
+  public void waitForServerOnline(){
+    while (!isOnline() && !isStopped()){
+       sleeper.sleep();
     }
   }
 
@@ -1722,10 +1729,17 @@ public class HRegionServer implements HR
    */
   private ServerName getMaster() {
     ServerName masterServerName = null;
+    long previousLogTime = 0;
     while ((masterServerName = this.masterAddressManager.getMasterAddress()) == null) {
       if (!keepLooping()) return null;
-      LOG.debug("No master found; retry");
-      sleeper.sleep();
+      if (System.currentTimeMillis() > (previousLogTime+1000)){
+        LOG.debug("No master found; retry");
+        previousLogTime = System.currentTimeMillis();
+      }
+      try {
+        Thread.sleep(100);
+      } catch (InterruptedException ignored) {
+      }
     }
     InetSocketAddress isa =
       new InetSocketAddress(masterServerName.getHostname(), masterServerName.getPort());
@@ -1744,11 +1758,20 @@ public class HRegionServer implements HR
         e = e instanceof RemoteException ?
             ((RemoteException)e).unwrapRemoteException() : e;
         if (e instanceof ServerNotRunningYetException) {
-          LOG.info("Master isn't available yet, retrying");
+          if (System.currentTimeMillis() > (previousLogTime+1000)){
+            LOG.info("Master isn't available yet, retrying");
+            previousLogTime = System.currentTimeMillis();
+          }
         } else {
-          LOG.warn("Unable to connect to master. Retrying. Error was:", e);
+          if (System.currentTimeMillis() > (previousLogTime + 1000)) {
+            LOG.warn("Unable to connect to master. Retrying. Error was:", e);
+            previousLogTime = System.currentTimeMillis();
+          }
+        }
+        try {
+          Thread.sleep(200);
+        } catch (InterruptedException ignored) {
         }
-        sleeper.sleep();
       }
     }
     LOG.info("Connected to master at " + isa);

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/util/JVMClusterUtil.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/util/JVMClusterUtil.java?rev=1204769&r1=1204768&r2=1204769&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/util/JVMClusterUtil.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/util/JVMClusterUtil.java Tue Nov 22
01:04:40 2011
@@ -62,14 +62,7 @@ public class JVMClusterUtil {
       // the HRS#run method.  HRS#init can fail for whatever region.  In those
       // cases, we'll jump out of the run without setting online flag.  Check
       // stopRequested so we don't wait here a flag that will never be flipped.
-      while (!this.regionServer.isOnline() &&
-          !this.regionServer.isStopped()) {
-        try {
-          Thread.sleep(1000);
-        } catch (InterruptedException e) {
-          // continue waiting
-        }
-      }
+      regionServer.waitForServerOnline();
     }
   }
 
@@ -118,22 +111,6 @@ public class JVMClusterUtil {
     public HMaster getMaster() {
       return this.master;
     }
-
-    /**
-     * Block until the master has come online, indicating it is ready
-     * to be used.
-     */
-    public void waitForServerOnline() {
-      // The server is marked online after init begins but before race to become
-      // the active master.
-      while (!this.master.isMasterRunning() && !this.master.isStopped()) {
-        try {
-          Thread.sleep(1000);
-        } catch (InterruptedException e) {
-          // continue waiting
-        }
-      }
-    }
   }
 
   /**
@@ -165,20 +142,49 @@ public class JVMClusterUtil {
     return new JVMClusterUtil.MasterThread(server, index);
   }
 
+  private static JVMClusterUtil.MasterThread findActiveMaster(
+    List<JVMClusterUtil.MasterThread> masters) {
+    for (JVMClusterUtil.MasterThread t : masters) {
+      if (t.master.isActiveMaster()) {
+        return t;
+      }
+    }
+
+    return null;
+  }
+
   /**
-   * Start the cluster.  Waits until there is a primary master and returns its
-   * address.
+   * Start the cluster.  Waits until there is a primary master initialized
+   * and returns its address.
    * @param masters
    * @param regionservers
    * @return Address to use contacting primary master.
    */
   public static String startup(final List<JVMClusterUtil.MasterThread> masters,
       final List<JVMClusterUtil.RegionServerThread> regionservers) throws IOException
{
-    if (masters != null) {
-      for (JVMClusterUtil.MasterThread t : masters) {
-        t.start();
+
+    if (masters == null || masters.isEmpty()) {
+      return null;
+    }
+
+    for (JVMClusterUtil.MasterThread t : masters) {
+      t.start();
+    }
+
+    // Wait for an active master
+    //  having an active master before starting the region threads allows
+    //  then to succeed on their connection to master
+    long startTime = System.currentTimeMillis();
+    while (findActiveMaster(masters) == null) {
+      try {
+        Thread.sleep(100);
+      } catch (InterruptedException ignored) {
+      }
+      if (System.currentTimeMillis() > startTime + 30000) {
+        throw new RuntimeException("Master not active after 30 seconds");
       }
     }
+
     if (regionservers != null) {
       for (JVMClusterUtil.RegionServerThread t: regionservers) {
         HRegionServer hrs = t.getRegionServer();
@@ -187,19 +193,21 @@ public class JVMClusterUtil {
         t.start();
       }
     }
-    if (masters == null || masters.isEmpty()) {
-      return null;
-    }
-    // Wait for an active master
+
+    // Wait for an active master to be initialized (implies being master)
+    //  with this, when we return the cluster is complete
+    startTime = System.currentTimeMillis();
     while (true) {
-      for (JVMClusterUtil.MasterThread t : masters) {
-        if (t.master.isActiveMaster()) {
-          return t.master.getServerName().toString();
-        }
+      JVMClusterUtil.MasterThread t = findActiveMaster(masters);
+      if (t != null && t.master.isInitialized()) {
+        return t.master.getServerName().toString();
+      }
+      if (System.currentTimeMillis() > startTime + 200000) {
+        throw new RuntimeException("Master not initialized after 200 seconds");
       }
       try {
-        Thread.sleep(1000);
-      } catch(InterruptedException e) {
+        Thread.sleep(100);
+      } catch (InterruptedException ignored) {
         // Keep waiting
       }
     }

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/util/Sleeper.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/util/Sleeper.java?rev=1204769&r1=1204768&r2=1204769&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/util/Sleeper.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/util/Sleeper.java Tue Nov 22 01:04:40
2011
@@ -62,7 +62,7 @@ public class Sleeper {
   public void skipSleepCycle() {
     synchronized (sleepLock) {
       triggerWake = true;
-      sleepLock.notify();
+      sleepLock.notifyAll();
     }
   }
 

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperNodeTracker.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperNodeTracker.java?rev=1204769&r1=1204768&r2=1204769&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperNodeTracker.java
(original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperNodeTracker.java
Tue Nov 22 01:04:40 2011
@@ -127,11 +127,9 @@ public abstract class ZooKeeperNodeTrack
       }
     }
     while (!this.stopped && (notimeout || remaining > 0) && this.data
== null) {
-      if (notimeout) {
-        wait();
-        continue;
-      }
-      wait(remaining);
+      // We expect a notification; but we wait with a
+      //  a timeout to lower the impact of a race condition if any
+      wait(100);
       remaining = timeout - (System.currentTimeMillis() - startTime);
     }
     return this.data;

Modified: hbase/trunk/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java?rev=1204769&r1=1204768&r2=1204769&view=diff
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java (original)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java Tue Nov 22
01:04:40 2011
@@ -1245,7 +1245,7 @@ public class HBaseTestingUtility {
       boolean checkStatus) throws Exception {
     Configuration c = new Configuration(this.conf);
     String quorumServers = ZKConfig.getZKQuorumServersString(c);
-    int sessionTimeout = 5 * 1000; // 5 seconds
+    int sessionTimeout = 500;
     ZooKeeper zk = nodeZK.getRecoverableZooKeeper().getZooKeeper();
     byte[] password = zk.getSessionPasswd();
     long sessionID = zk.getSessionId();
@@ -1253,7 +1253,7 @@ public class HBaseTestingUtility {
     ZooKeeper newZK = new ZooKeeper(quorumServers,
         sessionTimeout, EmptyWatcher.instance, sessionID, password);
     newZK.close();
-    final long sleep = sessionTimeout * 5L;
+    final long sleep = 7000; // 7s seems enough to manage the timeout
     LOG.info("ZK Closed Session 0x" + Long.toHexString(sessionID) +
       "; sleeping=" + sleep);
 

Modified: hbase/trunk/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java?rev=1204769&r1=1204768&r2=1204769&view=diff
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java (original)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java Tue Nov 22 01:04:40
2011
@@ -293,7 +293,6 @@ public class MiniHBaseCluster {
     try {
       t = hbaseCluster.addMaster(c, hbaseCluster.getMasters().size(), user);
       t.start();
-      t.waitForServerOnline();
     } catch (InterruptedException ie) {
       throw new IOException("Interrupted adding master to cluster", ie);
     }
@@ -382,7 +381,7 @@ public class MiniHBaseCluster {
           return true;
         }
       }
-      Thread.sleep(200);
+      Thread.sleep(100);
     }
     return false;
   }

Modified: hbase/trunk/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java?rev=1204769&r1=1204768&r2=1204769&view=diff
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java (original)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/TestRegionRebalancing.java Tue Nov 22
01:04:40 2011
@@ -212,7 +212,7 @@ public class TestRegionRebalancing {
     // while (!cluster.getMaster().allRegionsAssigned()) {
       LOG.debug("Waiting for there to be 22 regions, but there are " + getRegionCount() +
" right now.");
       try {
-        Thread.sleep(1000);
+        Thread.sleep(200);
       } catch (InterruptedException e) {}
     }
   }

Modified: hbase/trunk/src/test/java/org/apache/hadoop/hbase/catalog/TestCatalogTrackerOnCluster.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/catalog/TestCatalogTrackerOnCluster.java?rev=1204769&r1=1204768&r2=1204769&view=diff
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/catalog/TestCatalogTrackerOnCluster.java
(original)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/catalog/TestCatalogTrackerOnCluster.java
Tue Nov 22 01:04:40 2011
@@ -23,6 +23,7 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hbase.*;
 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
+import org.apache.zookeeper.KeeperException;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 
@@ -37,7 +38,7 @@ public class TestCatalogTrackerOnCluster
 
   /**
    * @throws Exception 
-   * @see https://issues.apache.org/jira/browse/HBASE-3445
+   * @see {https://issues.apache.org/jira/browse/HBASE-3445}
    */
   @Test public void testBadOriginalRootLocation() throws Exception {
     UTIL.getConfiguration().setInt("ipc.socket.timeout", 3000);
@@ -61,9 +62,15 @@ public class TestCatalogTrackerOnCluster
     ServerName nonsense =
       new ServerName("example.org", 1234, System.currentTimeMillis());
     RootLocationEditor.setRootLocation(zookeeper, nonsense);
+
     // Bring back up the hbase cluster.  See if it can deal with nonsense root
-    // location.
+    // location. The cluster should start and be fully available.
     UTIL.startMiniHBaseCluster(1, 1);
+
+    // if we can create a table, it's a good sign that it's working
+    UTIL.createTable(
+      getClass().getSimpleName().getBytes(), "family".getBytes());
+
     UTIL.shutdownMiniCluster();
   }
 }

Modified: hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestRestartCluster.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestRestartCluster.java?rev=1204769&r1=1204768&r2=1204769&view=diff
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestRestartCluster.java (original)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestRestartCluster.java Tue Nov
22 01:04:40 2011
@@ -94,8 +94,11 @@ public class TestRestartCluster {
     LOG.info("\n\nCreating tables");
     for(byte [] TABLE : TABLES) {
       UTIL.createTable(TABLE, FAMILY);
+    }
+    for(byte [] TABLE : TABLES) {
       UTIL.waitTableAvailable(TABLE, 30000);
     }
+
     List<HRegionInfo> allRegions =
       MetaScanner.listAllRegions(UTIL.getConfiguration());
     assertEquals(3, allRegions.size());

Modified: hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLog.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLog.java?rev=1204769&r1=1204768&r2=1204769&view=diff
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLog.java (original)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLog.java Tue Nov
22 01:04:40 2011
@@ -127,7 +127,7 @@ public class TestHLog  {
   }
   @AfterClass
   public static void tearDownAfterClass() throws Exception {
-    TEST_UTIL.shutdownMiniDFSCluster();
+    TEST_UTIL.shutdownMiniCluster();
   }
 
   private static String getName() {

Modified: hbase/trunk/src/test/java/org/apache/hadoop/hbase/util/TestFSTableDescriptors.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/util/TestFSTableDescriptors.java?rev=1204769&r1=1204768&r2=1204769&view=diff
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/util/TestFSTableDescriptors.java (original)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/util/TestFSTableDescriptors.java Tue
Nov 22 01:04:40 2011
@@ -59,7 +59,7 @@ public class TestFSTableDescriptors {
     assertTrue(FSTableDescriptors.createTableDescriptor(fs, testdir, htd));
     assertFalse(FSTableDescriptors.createTableDescriptor(fs, testdir, htd));
     FileStatus [] statuses = fs.listStatus(testdir);
-    assertTrue(statuses.length == 1);
+    assertTrue("statuses.length="+statuses.length, statuses.length == 1);
     for (int i = 0; i < 10; i++) {
       FSTableDescriptors.updateHTableDescriptor(fs, testdir, htd);
     }



Mime
View raw message