From common-commits-return-84586-archive-asf-public=cust-asf.ponee.io@hadoop.apache.org Wed Jun 20 21:15:18 2018 Return-Path: X-Original-To: archive-asf-public@cust-asf.ponee.io Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by mx-eu-01.ponee.io (Postfix) with SMTP id A741718076D for ; Wed, 20 Jun 2018 21:15:16 +0200 (CEST) Received: (qmail 76816 invoked by uid 500); 20 Jun 2018 19:15:09 -0000 Mailing-List: contact common-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Delivered-To: mailing list common-commits@hadoop.apache.org Received: (qmail 73381 invoked by uid 99); 20 Jun 2018 19:15:06 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 20 Jun 2018 19:15:06 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 27907E114E; Wed, 20 Jun 2018 19:15:06 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: inigoiri@apache.org To: common-commits@hadoop.apache.org Date: Wed, 20 Jun 2018 19:15:31 -0000 Message-Id: <9b5cb7a9bacb46f3a9436f4a6e1ca4d2@git.apache.org> In-Reply-To: <021a532980de42d79d657f539a64073f@git.apache.org> References: <021a532980de42d79d657f539a64073f@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [27/66] [abbrv] hadoop git commit: HDFS-13641. Add metrics for edit log tailing. Contributed by Chao Sun. HDFS-13641. Add metrics for edit log tailing. Contributed by Chao Sun. Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/8e7548d3 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/8e7548d3 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/8e7548d3 Branch: refs/heads/HDFS-13532 Commit: 8e7548d33be9c4874daab18b2e774bdc2ed216d3 Parents: 6307962 Author: Yiqun Lin Authored: Wed Jun 13 20:05:55 2018 +0800 Committer: Yiqun Lin Committed: Wed Jun 13 20:05:55 2018 +0800 ---------------------------------------------------------------------- .../hadoop-common/src/site/markdown/Metrics.md | 12 ++++ .../org/apache/hadoop/test/MetricsAsserts.java | 22 ++++++-- .../hdfs/server/namenode/ha/EditLogTailer.java | 11 ++++ .../namenode/metrics/NameNodeMetrics.java | 59 +++++++++++++++++++- .../namenode/metrics/TestNameNodeMetrics.java | 41 ++++++++++++++ 5 files changed, 140 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/8e7548d3/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md b/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md index 2538491..676ab0b 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md @@ -181,6 +181,18 @@ Each metrics record contains tags such as ProcessName, SessionId, and Hostname a | `WarmUpEDEKTimeAvgTime` | Average time of warming up EDEK in milliseconds | | `ResourceCheckTime`*num*`s(50/75/90/95/99)thPercentileLatency` | The 50/75/90/95/99th percentile of NameNode resource check latency in milliseconds. Percentile measurement is off by default, by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`. | | `StorageBlockReport`*num*`s(50/75/90/95/99)thPercentileLatency` | The 50/75/90/95/99th percentile of storage block report latency in milliseconds. Percentile measurement is off by default, by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`. | +| `EditLogTailTimeNumOps` | Total number of times the standby NameNode tailed the edit log | +| `EditLogTailTimeAvgTime` | Average time (in milliseconds) spent by standby NameNode in tailing edit log | +| `EditLogTailTime`*num*`s(50/75/90/95/99)thPercentileLatency` | The 50/75/90/95/99th percentile of time spent in tailing edit logs by standby NameNode, in milliseconds. Percentile measurement is off by default, by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`. | +| `EditLogFetchTimeNumOps` | Total number of times the standby NameNode fetched remote edit streams from journal nodes | +| `EditLogFetchTimeAvgTime` | Average time (in milliseconds) spent by standby NameNode in fetching remote edit streams from journal nodes | +| `EditLogFetchTime`*num*`s(50/75/90/95/99)thPercentileLatency` | The 50/75/90/95/99th percentile of time spent in fetching edit streams from journal nodes by standby NameNode, in milliseconds. Percentile measurement is off by default, by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`. | +| `NumEditLogLoadedNumOps` | Total number of times edits were loaded by standby NameNode | +| `NumEditLogLoadedAvgCount` | Average number of edits loaded by standby NameNode in each edit log tailing | +| `NumEditLogLoaded`*num*`s(50/75/90/95/99)thPercentileCount` | The 50/75/90/95/99th percentile of number of edits loaded by standby NameNode in each edit log tailing. Percentile measurement is off by default, by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`. | +| `EditLogTailIntervalNumOps` | Total number of intervals between edit log tailings by standby NameNode | +| `EditLogTailIntervalAvgTime` | Average time of intervals between edit log tailings by standby NameNode in milliseconds | +| `EditLogTailInterval`*num*`s(50/75/90/95/99)thPercentileLatency` | The 50/75/90/95/99th percentile of time between edit log tailings by standby NameNode, in milliseconds. Percentile measurement is off by default, by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`. | FSNamesystem ------------ http://git-wip-us.apache.org/repos/asf/hadoop/blob/8e7548d3/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/MetricsAsserts.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/MetricsAsserts.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/MetricsAsserts.java index caa65c5..d384834 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/MetricsAsserts.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/MetricsAsserts.java @@ -367,17 +367,31 @@ public class MetricsAsserts { } /** - * Asserts that the NumOps and quantiles for a metric have been changed at - * some point to a non-zero value. + * Asserts that the NumOps and quantiles for a metric with value name + * "Latency" have been changed at some point to a non-zero value. * * @param prefix of the metric * @param rb MetricsRecordBuilder with the metric */ - public static void assertQuantileGauges(String prefix, + public static void assertQuantileGauges(String prefix, MetricsRecordBuilder rb) { + assertQuantileGauges(prefix, rb, "Latency"); + } + + /** + * Asserts that the NumOps and quantiles for a metric have been changed at + * some point to a non-zero value, for the specified value name of the + * metrics (e.g., "Latency", "Count"). + * + * @param prefix of the metric + * @param rb MetricsRecordBuilder with the metric + * @param valueName the value name for the metric + */ + public static void assertQuantileGauges(String prefix, + MetricsRecordBuilder rb, String valueName) { verify(rb).addGauge(eqName(info(prefix + "NumOps", "")), geq(0l)); for (Quantile q : MutableQuantiles.quantiles) { - String nameTemplate = prefix + "%dthPercentileLatency"; + String nameTemplate = prefix + "%dthPercentile" + valueName; int percentile = (int) (100 * q.quantile); verify(rb).addGauge( eqName(info(String.format(nameTemplate, percentile), "")), http://git-wip-us.apache.org/repos/asf/hadoop/blob/8e7548d3/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java index 73a111e..2003f94 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java @@ -62,6 +62,7 @@ import static org.apache.hadoop.util.ExitUtil.terminate; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; +import org.apache.hadoop.util.Time; /** @@ -292,6 +293,7 @@ public class EditLogTailer { LOG.debug("lastTxnId: " + lastTxnId); } Collection streams; + long startTime = Time.monotonicNow(); try { streams = editLog.selectInputStreams(lastTxnId + 1, 0, null, inProgressOk, true); @@ -302,6 +304,9 @@ public class EditLogTailer { LOG.warn("Edits tailer failed to find any streams. Will try again " + "later.", ioe); return; + } finally { + NameNode.getNameNodeMetrics().addEditLogFetchTime( + Time.monotonicNow() - startTime); } if (LOG.isDebugEnabled()) { LOG.debug("edit streams to load from: " + streams.size()); @@ -322,6 +327,7 @@ public class EditLogTailer { LOG.debug(String.format("Loaded %d edits starting from txid %d ", editsLoaded, lastTxnId)); } + NameNode.getNameNodeMetrics().addNumEditLogLoaded(editsLoaded); } if (editsLoaded > 0) { @@ -446,10 +452,15 @@ public class EditLogTailer { // name system lock will be acquired to further block even the block // state updates. namesystem.cpLockInterruptibly(); + long startTime = Time.monotonicNow(); try { + NameNode.getNameNodeMetrics().addEditLogTailInterval( + startTime - lastLoadTimeMs); doTailEdits(); } finally { namesystem.cpUnlock(); + NameNode.getNameNodeMetrics().addEditLogTailTime( + Time.monotonicNow() - startTime); } //Update NameDirSize Metric namesystem.getFSImage().getStorage().updateNameDirSize(); http://git-wip-us.apache.org/repos/asf/hadoop/blob/8e7548d3/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/NameNodeMetrics.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/NameNodeMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/NameNodeMetrics.java index 94c5e9e..5d43ebf 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/NameNodeMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/NameNodeMetrics.java @@ -32,6 +32,7 @@ import org.apache.hadoop.metrics2.lib.MutableCounterLong; import org.apache.hadoop.metrics2.lib.MutableGaugeInt; import org.apache.hadoop.metrics2.lib.MutableQuantiles; import org.apache.hadoop.metrics2.lib.MutableRate; +import org.apache.hadoop.metrics2.lib.MutableStat; import org.apache.hadoop.metrics2.source.JvmMetrics; /** @@ -134,6 +135,18 @@ public class NameNodeMetrics { @Metric("Time loading FS Image at startup in msec") MutableGaugeInt fsImageLoadTime; + @Metric("Time tailing edit logs in msec") + MutableRate editLogTailTime; + private final MutableQuantiles[] editLogTailTimeQuantiles; + @Metric MutableRate editLogFetchTime; + private final MutableQuantiles[] editLogFetchTimeQuantiles; + @Metric(value = "Number of edits loaded", valueName = "Count") + MutableStat numEditLogLoaded; + private final MutableQuantiles[] numEditLogLoadedQuantiles; + @Metric("Time between edit log tailing in msec") + MutableRate editLogTailInterval; + private final MutableQuantiles[] editLogTailIntervalQuantiles; + @Metric("GetImageServlet getEdit") MutableRate getEdit; @Metric("GetImageServlet getImage") @@ -156,7 +169,11 @@ public class NameNodeMetrics { generateEDEKTimeQuantiles = new MutableQuantiles[len]; warmUpEDEKTimeQuantiles = new MutableQuantiles[len]; resourceCheckTimeQuantiles = new MutableQuantiles[len]; - + editLogTailTimeQuantiles = new MutableQuantiles[len]; + editLogFetchTimeQuantiles = new MutableQuantiles[len]; + numEditLogLoadedQuantiles = new MutableQuantiles[len]; + editLogTailIntervalQuantiles = new MutableQuantiles[len]; + for (int i = 0; i < len; i++) { int interval = intervals[i]; syncsQuantiles[i] = registry.newQuantiles( @@ -181,6 +198,18 @@ public class NameNodeMetrics { resourceCheckTimeQuantiles[i] = registry.newQuantiles( "resourceCheckTime" + interval + "s", "resource check time", "ops", "latency", interval); + editLogTailTimeQuantiles[i] = registry.newQuantiles( + "editLogTailTime" + interval + "s", + "Edit log tailing time", "ops", "latency", interval); + editLogFetchTimeQuantiles[i] = registry.newQuantiles( + "editLogFetchTime" + interval + "s", + "Edit log fetch time", "ops", "latency", interval); + numEditLogLoadedQuantiles[i] = registry.newQuantiles( + "numEditLogLoaded" + interval + "s", + "Number of edits loaded", "ops", "count", interval); + editLogTailIntervalQuantiles[i] = registry.newQuantiles( + "editLogTailInterval" + interval + "s", + "Edit log tailing interval", "ops", "latency", interval); } } @@ -389,4 +418,32 @@ public class NameNodeMetrics { q.add(latency); } } + + public void addEditLogTailTime(long elapsed) { + editLogTailTime.add(elapsed); + for (MutableQuantiles q : editLogTailTimeQuantiles) { + q.add(elapsed); + } + } + + public void addEditLogFetchTime(long elapsed) { + editLogFetchTime.add(elapsed); + for (MutableQuantiles q : editLogFetchTimeQuantiles) { + q.add(elapsed); + } + } + + public void addNumEditLogLoaded(long loaded) { + numEditLogLoaded.add(loaded); + for (MutableQuantiles q : numEditLogLoadedQuantiles) { + q.add(loaded); + } + } + + public void addEditLogTailInterval(long elapsed) { + editLogTailInterval.add(elapsed); + for (MutableQuantiles q : editLogTailIntervalQuantiles) { + q.add(elapsed); + } + } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/8e7548d3/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java index 92bc51c..e34deea 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java @@ -32,6 +32,7 @@ import org.apache.hadoop.hdfs.client.HdfsAdmin; import static org.apache.hadoop.fs.CommonConfigurationKeys.HA_HM_RPC_TIMEOUT_DEFAULT; import static org.apache.hadoop.fs.CommonConfigurationKeys.HA_HM_RPC_TIMEOUT_KEY; import static org.apache.hadoop.test.MetricsAsserts.assertCounter; +import static org.apache.hadoop.test.MetricsAsserts.assertCounterGt; import static org.apache.hadoop.test.MetricsAsserts.assertGauge; import static org.apache.hadoop.test.MetricsAsserts.assertQuantileGauges; import static org.apache.hadoop.test.MetricsAsserts.getMetrics; @@ -1059,4 +1060,44 @@ public class TestNameNodeMetrics { } } } + + @Test + public void testEditLogTailing() throws Exception { + HdfsConfiguration conf = new HdfsConfiguration(); + conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1); + conf.setInt(DFSConfigKeys.DFS_METRICS_PERCENTILES_INTERVALS_KEY, 60); + MiniDFSCluster dfsCluster = null; + try { + dfsCluster = new MiniDFSCluster.Builder(conf) + .numDataNodes(0) + .nnTopology(MiniDFSNNTopology.simpleHATopology()) + .build(); + DistributedFileSystem dfs = dfsCluster.getFileSystem(0); + dfsCluster.transitionToActive(0); + dfsCluster.waitActive(); + + Path testDir = new Path("/testdir"); + dfs.mkdir(testDir, FsPermission.getDefault()); + + dfsCluster.getNameNodeRpc(0).rollEditLog(); + Thread.sleep(2 * 1000); + + // We need to get the metrics for the SBN (excluding the NN from dfs + // cluster created in setUp() and the ANN). + MetricsRecordBuilder rb = getMetrics(NN_METRICS+"-2"); + assertQuantileGauges("EditLogTailTime60s", rb); + assertQuantileGauges("EditLogFetchTime60s", rb); + assertQuantileGauges("NumEditLogLoaded60s", rb, "Count"); + assertQuantileGauges("EditLogTailInterval60s", rb); + assertCounterGt("EditLogTailTimeNumOps", 0L, rb); + assertCounterGt("EditLogFetchTimeNumOps", 0L, rb); + assertCounterGt("NumEditLogLoadedNumOps", 0L, rb); + assertCounterGt("EditLogTailIntervalNumOps", 0L, rb); + } finally { + if (dfsCluster != null) { + dfsCluster.shutdown(); + } + } + + } } --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org For additional commands, e-mail: common-commits-help@hadoop.apache.org