From common-commits-return-93709-archive-asf-public=cust-asf.ponee.io@hadoop.apache.org Thu Mar 14 19:42:09 2019 Return-Path: X-Original-To: archive-asf-public@cust-asf.ponee.io Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by mx-eu-01.ponee.io (Postfix) with SMTP id 43820180630 for ; Thu, 14 Mar 2019 20:42:09 +0100 (CET) Received: (qmail 19985 invoked by uid 500); 14 Mar 2019 19:42:08 -0000 Mailing-List: contact common-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Delivered-To: mailing list common-commits@hadoop.apache.org Received: (qmail 19976 invoked by uid 99); 14 Mar 2019 19:42:08 -0000 Received: from ec2-52-202-80-70.compute-1.amazonaws.com (HELO gitbox.apache.org) (52.202.80.70) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 14 Mar 2019 19:42:08 +0000 Received: by gitbox.apache.org (ASF Mail Server at gitbox.apache.org, from userid 33) id 84224852BB; Thu, 14 Mar 2019 19:42:07 +0000 (UTC) Date: Thu, 14 Mar 2019 19:42:07 +0000 To: "common-commits@hadoop.apache.org" Subject: [hadoop] branch trunk updated: HDDS-917. Expose NodeManagerMXBean as a MetricsSource. Contributed by Siddharth Wagle. MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit Message-ID: <155259252737.23115.15897086428675781891@gitbox.apache.org> From: bharat@apache.org X-Git-Host: gitbox.apache.org X-Git-Repo: hadoop X-Git-Refname: refs/heads/trunk X-Git-Reftype: branch X-Git-Oldrev: 688b177fc6402fc9b00b1312bffbe8226c5ea502 X-Git-Newrev: 091a664977a3b97cd6057129da9d093a73d63a68 X-Git-Rev: 091a664977a3b97cd6057129da9d093a73d63a68 X-Git-NotificationType: ref_changed_plus_diff X-Git-Multimail-Version: 1.5.dev Auto-Submitted: auto-generated This is an automated email from the ASF dual-hosted git repository. bharat pushed a commit to branch trunk in repository https://gitbox.apache.org/repos/asf/hadoop.git The following commit(s) were added to refs/heads/trunk by this push: new 091a664 HDDS-917. Expose NodeManagerMXBean as a MetricsSource. Contributed by Siddharth Wagle. 091a664 is described below commit 091a664977a3b97cd6057129da9d093a73d63a68 Author: Bharat Viswanadham AuthorDate: Thu Mar 14 12:30:06 2019 -0700 HDDS-917. Expose NodeManagerMXBean as a MetricsSource. Contributed by Siddharth Wagle. --- .../hadoop/hdds/scm/node/SCMNodeManager.java | 4 +- .../hadoop/hdds/scm/node/SCMNodeMetrics.java | 76 ++++++++++++++++++++-- .../hadoop/ozone/scm/node/TestSCMNodeMetrics.java | 40 ++++++++++++ 3 files changed, 114 insertions(+), 6 deletions(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java index e457b13..4464ed1 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java @@ -102,7 +102,6 @@ public class SCMNodeManager implements NodeManager { public SCMNodeManager(OzoneConfiguration conf, String clusterID, StorageContainerManager scmManager, EventPublisher eventPublisher) throws IOException { - this.metrics = SCMNodeMetrics.create(); this.nodeStateManager = new NodeStateManager(conf, eventPublisher); this.clusterID = clusterID; this.version = VersionInfo.getLatestVersion(); @@ -110,6 +109,7 @@ public class SCMNodeManager implements NodeManager { this.scmManager = scmManager; LOG.info("Entering startup chill mode."); registerMXBean(); + this.metrics = SCMNodeMetrics.create(this); } private void registerMXBean() { @@ -118,7 +118,7 @@ public class SCMNodeManager implements NodeManager { } private void unregisterMXBean() { - if(this.nmInfoBean != null) { + if (this.nmInfoBean != null) { MBeans.unregister(this.nmInfoBean); this.nmInfoBean = null; } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeMetrics.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeMetrics.java index 30b1079..1596523 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeMetrics.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeMetrics.java @@ -18,11 +18,24 @@ package org.apache.hadoop.hdds.scm.node; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.DEAD; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.DECOMMISSIONED; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.DECOMMISSIONING; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.HEALTHY; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.STALE; + +import java.util.Map; + import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.metrics2.MetricsCollector; +import org.apache.hadoop.metrics2.MetricsInfo; +import org.apache.hadoop.metrics2.MetricsSource; import org.apache.hadoop.metrics2.MetricsSystem; import org.apache.hadoop.metrics2.annotation.Metric; import org.apache.hadoop.metrics2.annotation.Metrics; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; +import org.apache.hadoop.metrics2.lib.Interns; +import org.apache.hadoop.metrics2.lib.MetricsRegistry; import org.apache.hadoop.metrics2.lib.MutableCounterLong; /** @@ -30,7 +43,7 @@ import org.apache.hadoop.metrics2.lib.MutableCounterLong; */ @InterfaceAudience.Private @Metrics(about = "SCM NodeManager Metrics", context = "ozone") -public final class SCMNodeMetrics { +public final class SCMNodeMetrics implements MetricsSource { private static final String SOURCE_NAME = SCMNodeMetrics.class.getSimpleName(); @@ -40,18 +53,26 @@ public final class SCMNodeMetrics { private @Metric MutableCounterLong numNodeReportProcessed; private @Metric MutableCounterLong numNodeReportProcessingFailed; + private final MetricsRegistry registry; + private final NodeManagerMXBean managerMXBean; + private final MetricsInfo recordInfo = Interns.info("SCMNodeManager", + "SCM NodeManager metrics"); + /** Private constructor. */ - private SCMNodeMetrics() { } + private SCMNodeMetrics(NodeManagerMXBean managerMXBean) { + this.managerMXBean = managerMXBean; + this.registry = new MetricsRegistry(recordInfo); + } /** * Create and returns SCMNodeMetrics instance. * * @return SCMNodeMetrics */ - public static SCMNodeMetrics create() { + public static SCMNodeMetrics create(NodeManagerMXBean managerMXBean) { MetricsSystem ms = DefaultMetricsSystem.instance(); return ms.register(SOURCE_NAME, "SCM NodeManager Metrics", - new SCMNodeMetrics()); + new SCMNodeMetrics(managerMXBean)); } /** @@ -90,4 +111,51 @@ public final class SCMNodeMetrics { numNodeReportProcessingFailed.incr(); } + /** + * Get aggregated counter and gauage metrics. + */ + @Override + @SuppressWarnings("SuspiciousMethodCalls") + public void getMetrics(MetricsCollector collector, boolean all) { + Map nodeCount = managerMXBean.getNodeCount(); + Map nodeInfo = managerMXBean.getNodeInfo(); + + registry.snapshot( + collector.addRecord(registry.info()) // Add annotated ones first + .addGauge(Interns.info( + "HealthyNodes", + "Number of healthy datanodes"), + nodeCount.get(HEALTHY.toString())) + .addGauge(Interns.info("StaleNodes", + "Number of stale datanodes"), + nodeCount.get(STALE.toString())) + .addGauge(Interns.info("DeadNodes", + "Number of dead datanodes"), + nodeCount.get(DEAD.toString())) + .addGauge(Interns.info("DecommissioningNodes", + "Number of decommissioning datanodes"), + nodeCount.get(DECOMMISSIONING.toString())) + .addGauge(Interns.info("DecommissionedNodes", + "Number of decommissioned datanodes"), + nodeCount.get(DECOMMISSIONED.toString())) + .addGauge(Interns.info("DiskCapacity", + "Total disk capacity"), + nodeInfo.get("DISKCapacity")) + .addGauge(Interns.info("DiskUsed", + "Total disk capacity used"), + nodeInfo.get("DISKUsed")) + .addGauge(Interns.info("DiskRemaining", + "Total disk capacity remaining"), + nodeInfo.get("DISKRemaining")) + .addGauge(Interns.info("SSDCapacity", + "Total ssd capacity"), + nodeInfo.get("SSDCapacity")) + .addGauge(Interns.info("SSDUsed", + "Total ssd capacity used"), + nodeInfo.get("SSDUsed")) + .addGauge(Interns.info("SSDRemaining", + "Total disk capacity remaining"), + nodeInfo.get("SSDRemaining")), + all); + } } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestSCMNodeMetrics.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestSCMNodeMetrics.java index c18ae5f..d19be93 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestSCMNodeMetrics.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestSCMNodeMetrics.java @@ -35,6 +35,7 @@ import org.junit.Before; import org.junit.Test; import static org.apache.hadoop.test.MetricsAsserts.assertCounter; +import static org.apache.hadoop.test.MetricsAsserts.assertGauge; import static org.apache.hadoop.test.MetricsAsserts.getLongCounter; import static org.apache.hadoop.test.MetricsAsserts.getMetrics; @@ -128,6 +129,45 @@ public class TestSCMNodeMetrics { getMetrics(SCMNodeMetrics.class.getSimpleName())); } + /** + * Verify that datanode aggregated state and capacity metrics are reported. + */ + @Test + public void testNodeCountAndInfoMetricsReported() throws Exception { + HddsDatanodeService datanode = cluster.getHddsDatanodes().get(0); + StorageReportProto storageReport = TestUtils.createStorageReport( + datanode.getDatanodeDetails().getUuid(), "/tmp", 100, 10, 90, null); + NodeReportProto nodeReport = NodeReportProto.newBuilder() + .addStorageReport(storageReport).build(); + datanode.getDatanodeStateMachine().getContext().addReport(nodeReport); + datanode.getDatanodeStateMachine().triggerHeartbeat(); + // Give some time so that SCM receives and processes the heartbeat. + Thread.sleep(300L); + + assertGauge("HealthyNodes", 1, + getMetrics(SCMNodeMetrics.class.getSimpleName())); + assertGauge("StaleNodes", 0, + getMetrics(SCMNodeMetrics.class.getSimpleName())); + assertGauge("DeadNodes", 0, + getMetrics(SCMNodeMetrics.class.getSimpleName())); + assertGauge("DecommissioningNodes", 0, + getMetrics(SCMNodeMetrics.class.getSimpleName())); + assertGauge("DecommissionedNodes", 0, + getMetrics(SCMNodeMetrics.class.getSimpleName())); + assertGauge("DiskCapacity", 100L, + getMetrics(SCMNodeMetrics.class.getSimpleName())); + assertGauge("DiskUsed", 10L, + getMetrics(SCMNodeMetrics.class.getSimpleName())); + assertGauge("DiskRemaining", 90L, + getMetrics(SCMNodeMetrics.class.getSimpleName())); + assertGauge("SSDCapacity", 0L, + getMetrics(SCMNodeMetrics.class.getSimpleName())); + assertGauge("SSDUsed", 0L, + getMetrics(SCMNodeMetrics.class.getSimpleName())); + assertGauge("SSDRemaining", 0L, + getMetrics(SCMNodeMetrics.class.getSimpleName())); + } + @After public void teardown() { cluster.shutdown(); --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org For additional commands, e-mail: common-commits-help@hadoop.apache.org