hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jmhs...@apache.org
Subject svn commit: r1310159 - in /hbase/trunk/src: main/jamon/org/apache/hadoop/hbase/tmpl/master/ main/java/org/apache/hadoop/hbase/ main/java/org/apache/hadoop/hbase/master/ main/java/org/apache/hadoop/hbase/master/metrics/ test/java/org/apache/hadoop/hbase...
Date Fri, 06 Apr 2012 03:46:05 GMT
Author: jmhsieh
Date: Fri Apr  6 03:46:04 2012
New Revision: 1310159

URL: http://svn.apache.org/viewvc?rev=1310159&view=rev
Log:
HBASE-4348 Add metrics for regions in transition (Himanshu Vashishtha)

Modified:
    hbase/trunk/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/AssignmentManagerStatusTmpl.jamon
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/HConstants.java
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/metrics/MasterMetrics.java
    hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java

Modified: hbase/trunk/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/AssignmentManagerStatusTmpl.jamon
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/AssignmentManagerStatusTmpl.jamon?rev=1310159&r1=1310158&r2=1310159&view=diff
==============================================================================
--- hbase/trunk/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/AssignmentManagerStatusTmpl.jamon
(original)
+++ hbase/trunk/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/AssignmentManagerStatusTmpl.jamon
Fri Apr  6 03:46:04 2012
@@ -21,6 +21,9 @@ limitations under the License.
 org.apache.hadoop.hbase.HRegionInfo;
 org.apache.hadoop.hbase.master.AssignmentManager;
 org.apache.hadoop.hbase.master.AssignmentManager.RegionState;
+org.apache.hadoop.conf.Configuration;
+org.apache.hadoop.hbase.HBaseConfiguration;
+org.apache.hadoop.hbase.HConstants;
 java.util.Iterator;
 java.util.Map;
 </%import>
@@ -30,6 +33,23 @@ int limit = 100;
 </%args>
 <%java>
 Map<String, RegionState> rit = assignmentManager.getRegionsInTransition();
+// process the map to find region in transition details
+Configuration conf = HBaseConfiguration.create();
+int ritThreshold = conf.getInt(HConstants.METRICS_RIT_STUCK_WARNING_THRESHOLD, 60000);
+int numOfRITOverThreshold = 0;
+long maxRITTime = Long.MIN_VALUE;
+long currentTime = System.currentTimeMillis();
+String regionIDForOldestRIT = ""; // avoiding null
+for (Map.Entry<String, RegionState> e : rit.entrySet()) {
+  long ritTime = currentTime - e.getValue().getStamp();
+  if(ritTime > ritThreshold) {
+     numOfRITOverThreshold++;
+   }
+   if(maxRITTime < ritTime) {
+     maxRITTime = ritTime;
+     regionIDForOldestRIT = e.getKey();
+   }
+}
 
 int toRemove = rit.size() - limit;
 int removed = 0;
@@ -42,8 +62,8 @@ if (toRemove > 0) {
     if (HRegionInfo.FIRST_META_REGIONINFO.getEncodedName().equals(
           e.getKey()) ||
         HRegionInfo.ROOT_REGIONINFO.getEncodedName().equals(
-          e.getKey())) {
-      // don't remove the meta regions, they're too interesting!
+          e.getKey()) || regionIDForOldestRIT.equals(e.getKey())) {
+      // don't remove the meta & the oldest rit regions, they're too interesting!
       continue;
     } 
     it.remove();
@@ -58,13 +78,23 @@ if (toRemove > 0) {
 <%if rit.isEmpty() %>
 No regions in transition.
 <%else>
-	<table>
-		<tr><th>Region</th><th>State</th></tr>
-		<%for Map.Entry<String, RegionState> entry : rit.entrySet() %>
-		<tr><td><% entry.getKey() %></td><td><% entry.getValue().toDescriptiveString()
%></td>
-		</%for>
-	</table>
-	<%if removed > 0 %>
-	(<% removed %> more regions in transition not shown) 
-	</%if>
-</%if>
\ No newline at end of file
+        <table>
+                <tr><th>Region</th><th>State</th><th>RIT
time (ms)</th></tr>
+                <%for Map.Entry<String, RegionState> entry : rit.entrySet() %>
+                <%if regionIDForOldestRIT.equals(entry.getKey()) %>
+                        <tr BGCOLOR="#FE2E2E" >
+                <%else>
+                        <tr>
+                </%if>
+                <td><% entry.getKey() %></td><td><% entry.getValue().toDescriptiveString()
%></td>
+		<td><% (currentTime - entry.getValue().getStamp()) %> </td></tr>
+                </%for>
+                <tr BGCOLOR="#D7DF01"> <td>Total number of Regions in Transition
for more than <% ritThreshold %> milliseconds</td><td> <% numOfRITOverThreshold
%></td><td></td>
+                </tr>
+		<tr> <td> Total number of Regions in Transition</td><td><% rit.size()
%> </td><td></td>
+        </table>
+        <%if removed > 0 %>
+        (<% removed %> more regions in transition not shown)
+        </%if>
+</%if>
+

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/HConstants.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/HConstants.java?rev=1310159&r1=1310158&r2=1310159&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/HConstants.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/HConstants.java Fri Apr  6 03:46:04
2012
@@ -651,6 +651,9 @@ public final class HConstants {
   public static final String ENABLE_WAL_COMPRESSION =
     "hbase.regionserver.wal.enablecompression";
 
+/** Region in Transition metrics threshold time */
+  public static final String METRICS_RIT_STUCK_WARNING_THRESHOLD="hbase.metrics.rit.stuck.warning.threshold";
+
   private HConstants() {
     // Can't be instantiated with this ctor.
   }

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java?rev=1310159&r1=1310158&r2=1310159&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java Fri Apr
 6 03:46:04 2012
@@ -71,6 +71,7 @@ import org.apache.hadoop.hbase.master.ha
 import org.apache.hadoop.hbase.master.handler.OpenedRegionHandler;
 import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler;
 import org.apache.hadoop.hbase.master.handler.SplitRegionHandler;
+import org.apache.hadoop.hbase.master.metrics.MasterMetrics;
 import org.apache.hadoop.hbase.regionserver.RegionAlreadyInTransitionException;
 import org.apache.hadoop.hbase.regionserver.RegionOpeningState;
 import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException;
@@ -184,6 +185,9 @@ public class AssignmentManager extends Z
   private Map<String, HRegionInfo> failoverProcessedRegions =
     new HashMap<String, HRegionInfo>();
 
+   // metrics instance to send metrics for RITs
+   MasterMetrics masterMetrics;
+
   /**
    * Constructs a new assignment manager.
    *
@@ -195,7 +199,7 @@ public class AssignmentManager extends Z
    * @throws IOException 
    */
   public AssignmentManager(Server master, ServerManager serverManager,
-      CatalogTracker catalogTracker, final ExecutorService service)
+      CatalogTracker catalogTracker, final ExecutorService service, MasterMetrics metrics)
   throws KeeperException, IOException {
     super(master.getZooKeeper());
     this.master = master;
@@ -216,6 +220,7 @@ public class AssignmentManager extends Z
       this.master.getConfiguration().getInt("hbase.assignment.maximum.attempts", 10);
     this.balancer = LoadBalancerFactory.getLoadBalancer(conf);
     this.threadPoolExecutorService = Executors.newCachedThreadPool();
+    this.masterMetrics = metrics;// can be null only with tests.
   }
 
   /**
@@ -2707,6 +2712,38 @@ public class AssignmentManager extends Z
   }
 
   /**
+   * Set Regions in transitions metrics.
+   * This takes an iterator on the RegionInTransition map (CLSM), and is not synchronized.
+   * This iterator is not fail fast, wich may lead to stale read; but that's better than
+   * creating a copy of the map for metrics computation, as this method will be invoked
+   * on a frequent interval.
+   */
+  public void updateRegionsInTransitionMetrics() {
+    long currentTime = System.currentTimeMillis();
+    int totalRITs = 0;
+    int totalRITsOverThreshold = 0;
+    long oldestRITTime = 0;
+    int ritThreshold = this.master.getConfiguration().
+      getInt(HConstants.METRICS_RIT_STUCK_WARNING_THRESHOLD, 60000);
+    for (Map.Entry<String, RegionState> e : this.regionsInTransition.
+        entrySet()) {
+      totalRITs++;
+      long ritTime = currentTime - e.getValue().getStamp();
+      if (ritTime > ritThreshold) { // more than the threshold
+        totalRITsOverThreshold++;
+      }
+      if (oldestRITTime < ritTime) {
+        oldestRITTime = ritTime;
+      }
+    }
+    if (this.masterMetrics != null) {
+      this.masterMetrics.updateRITOldestAge(oldestRITTime);
+      this.masterMetrics.updateRITCount(totalRITs);
+      this.masterMetrics.updateRITCountOverThreshold(totalRITsOverThreshold);
+    }
+  }
+
+  /**
    * @return True if regions in transition.
    */
   public boolean isRegionsInTransition() {

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMaster.java?rev=1310159&r1=1310158&r2=1310159&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMaster.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/HMaster.java Fri Apr  6 03:46:04
2012
@@ -221,6 +221,8 @@ Server {
   private long masterStartTime;
   private long masterActiveTime;
 
+  /** time interval for emitting metrics values */
+  private final int msgInterval;
   /**
    * MX Bean for MasterInfo
    */
@@ -290,6 +292,8 @@ Server {
     this.zooKeeper = new ZooKeeperWatcher(conf, MASTER + ":" + isa.getPort(), this, true);
     this.rpcServer.startThreads();
     this.metrics = new MasterMetrics(getServerName().toString());
+    // metrics interval: using the same property as region server.
+    this.msgInterval = conf.getInt("hbase.regionserver.msginterval", 3 * 1000);
   }
 
   /**
@@ -412,7 +416,7 @@ Server {
     this.catalogTracker.start();
 
     this.assignmentManager = new AssignmentManager(this, serverManager,
-        this.catalogTracker, this.executorService);
+        this.catalogTracker, this.executorService, this.metrics);
     this.balancer = LoadBalancerFactory.getLoadBalancer(conf);
     zooKeeper.registerListenerFirst(assignmentManager);
 
@@ -456,13 +460,33 @@ Server {
 
   // Check if we should stop every 100ms
   private Sleeper stopSleeper = new Sleeper(100, this);
+
   private void loop() {
+    long lastMsgTs = 0l;
+    long now = 0l;
     while (!this.stopped) {
+      now = System.currentTimeMillis();
+      if ((now - lastMsgTs) >= this.msgInterval) {
+        doMetrics();
+        lastMsgTs = System.currentTimeMillis();
+      }
       stopSleeper.sleep();
     }
   }
 
   /**
+   * Emit the HMaster metrics, such as region in transition metrics.
+   * Surrounding in a try block just to be sure metrics doesn't abort HMaster.
+   */
+  private void doMetrics() {
+    try {
+      this.assignmentManager.updateRegionsInTransitionMetrics();
+    } catch (Throwable e) {
+      LOG.error("Couldn't update metrics: " + e.getMessage());
+    }
+  }
+
+/**
    * Finish initialization of HMaster after becoming the primary master.
    *
    * <ol>

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/metrics/MasterMetrics.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/metrics/MasterMetrics.java?rev=1310159&r1=1310158&r2=1310159&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/metrics/MasterMetrics.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/metrics/MasterMetrics.java Fri
Apr  6 03:46:04 2012
@@ -31,6 +31,7 @@ import org.apache.hadoop.metrics.Metrics
 import org.apache.hadoop.metrics.MetricsUtil;
 import org.apache.hadoop.metrics.Updater;
 import org.apache.hadoop.metrics.jvm.JvmMetrics;
+import org.apache.hadoop.metrics.util.MetricsIntValue;
 import org.apache.hadoop.metrics.util.MetricsLongValue;
 import org.apache.hadoop.metrics.util.MetricsRegistry;
 
@@ -66,6 +67,18 @@ public class MasterMetrics implements Up
   final PersistentMetricsTimeVaryingRate splitSize =
     new PersistentMetricsTimeVaryingRate("splitSize", registry);
 
+  /**
+    * Regions in Transition metrics such as number of RIT regions, oldest
+    * RIT time and number of such regions that are in transition
+    * for more than a specified threshold.
+    */
+  public final MetricsIntValue ritCount =
+    new MetricsIntValue("ritCount", registry);
+  public final MetricsIntValue ritCountOverThreshold =
+    new MetricsIntValue("ritCountOverThreshold", registry);
+  public final MetricsLongValue ritOldestAge =
+    new MetricsLongValue("ritOldestAge", registry);
+
   public MasterMetrics(final String name) {
     MetricsContext context = MetricsUtil.getContext("hbase");
     metricsRecord = MetricsUtil.createRecord(context, "master");
@@ -117,6 +130,9 @@ public class MasterMetrics implements Up
       this.cluster_requests.pushMetric(metricsRecord);
       this.splitTime.pushMetric(metricsRecord);
       this.splitSize.pushMetric(metricsRecord);
+      this.ritCount.pushMetric(metricsRecord);
+      this.ritCountOverThreshold.pushMetric(metricsRecord);
+      this.ritOldestAge.pushMetric(metricsRecord);
     }
     this.metricsRecord.update();
   }
@@ -148,4 +164,28 @@ public class MasterMetrics implements Up
   public void incrementRequests(final int inc) {
     this.cluster_requests.inc(inc);
   }
+
+  /**
+   * set new value for number of regions in transition.
+   * @param ritCount
+   */
+  public void updateRITCount(int ritCount) {
+    this.ritCount.set(ritCount);
+  }
+
+  /**
+   * update RIT count that are in this state for more than the threshold
+   * as defined by the property rit.metrics.threshold.time.
+   * @param ritCountOverThreshold
+   */
+  public void updateRITCountOverThreshold(int ritCountOverThreshold) {
+    this.ritCountOverThreshold.set(ritCountOverThreshold);
+  }
+  /**
+   * update the timestamp for oldest region in transition metrics.
+   * @param timestamp
+   */
+  public void updateRITOldestAge(long timestamp) {
+    this.ritOldestAge.set(timestamp);
+  }
 }

Modified: hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java?rev=1310159&r1=1310158&r2=1310159&view=diff
==============================================================================
--- hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java (original)
+++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java Fri
Apr  6 03:46:04 2012
@@ -307,7 +307,7 @@ public class TestAssignmentManager {
     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
     // Create an AM.
     AssignmentManager am =
-      new AssignmentManager(this.server, this.serverManager, ct, executor);
+      new AssignmentManager(this.server, this.serverManager, ct, executor, null);
     try {
       // Make sure our new AM gets callbacks; once registered, can't unregister.
       // Thats ok because we make a new zk watcher for each test.
@@ -372,7 +372,7 @@ public class TestAssignmentManager {
     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
     // Create an AM.
     AssignmentManager am =
-      new AssignmentManager(this.server, this.serverManager, ct, executor);
+      new AssignmentManager(this.server, this.serverManager, ct, executor, null);
     try {
       // Make sure our new AM gets callbacks; once registered, can't unregister.
       // Thats ok because we make a new zk watcher for each test.
@@ -446,7 +446,7 @@ public class TestAssignmentManager {
     CatalogTracker ct = Mockito.mock(CatalogTracker.class);
     // Create an AM.
     AssignmentManager am =
-      new AssignmentManager(this.server, this.serverManager, ct, null);
+      new AssignmentManager(this.server, this.serverManager, ct, null, null);
     try {
       // First make sure my mock up basically works.  Unassign a region.
       unassign(am, SERVERNAME_A, hri);
@@ -578,7 +578,7 @@ public class TestAssignmentManager {
         final ServerManager serverManager,
         final CatalogTracker catalogTracker, final ExecutorService service)
     throws KeeperException, IOException {
-      super(master, serverManager, catalogTracker, service);
+      super(master, serverManager, catalogTracker, service, null);
       this.es = service;
       this.ct = catalogTracker;
     }



Mime
View raw message