hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From inigo...@apache.org
Subject [27/66] [abbrv] hadoop git commit: HDFS-13641. Add metrics for edit log tailing. Contributed by Chao Sun.
Date Wed, 20 Jun 2018 19:15:31 GMT
HDFS-13641. Add metrics for edit log tailing. Contributed by Chao Sun.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/8e7548d3
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/8e7548d3
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/8e7548d3

Branch: refs/heads/HDFS-13532
Commit: 8e7548d33be9c4874daab18b2e774bdc2ed216d3
Parents: 6307962
Author: Yiqun Lin <yqlin@apache.org>
Authored: Wed Jun 13 20:05:55 2018 +0800
Committer: Yiqun Lin <yqlin@apache.org>
Committed: Wed Jun 13 20:05:55 2018 +0800

----------------------------------------------------------------------
 .../hadoop-common/src/site/markdown/Metrics.md  | 12 ++++
 .../org/apache/hadoop/test/MetricsAsserts.java  | 22 ++++++--
 .../hdfs/server/namenode/ha/EditLogTailer.java  | 11 ++++
 .../namenode/metrics/NameNodeMetrics.java       | 59 +++++++++++++++++++-
 .../namenode/metrics/TestNameNodeMetrics.java   | 41 ++++++++++++++
 5 files changed, 140 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/8e7548d3/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md b/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md
index 2538491..676ab0b 100644
--- a/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md
+++ b/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md
@@ -181,6 +181,18 @@ Each metrics record contains tags such as ProcessName, SessionId, and
Hostname a
 | `WarmUpEDEKTimeAvgTime` | Average time of warming up EDEK in milliseconds |
 | `ResourceCheckTime`*num*`s(50/75/90/95/99)thPercentileLatency` | The 50/75/90/95/99th percentile
of NameNode resource check latency in milliseconds. Percentile measurement is off by default,
by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`.
|
 | `StorageBlockReport`*num*`s(50/75/90/95/99)thPercentileLatency` | The 50/75/90/95/99th
percentile of storage block report latency in milliseconds. Percentile measurement is off
by default, by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`.
|
+| `EditLogTailTimeNumOps` | Total number of times the standby NameNode tailed the edit log
|
+| `EditLogTailTimeAvgTime` | Average time (in milliseconds) spent by standby NameNode in
tailing edit log |
+| `EditLogTailTime`*num*`s(50/75/90/95/99)thPercentileLatency` | The 50/75/90/95/99th percentile
of time spent in tailing edit logs by standby NameNode, in milliseconds. Percentile measurement
is off by default, by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`.
|
+| `EditLogFetchTimeNumOps` | Total number of times the standby NameNode fetched remote edit
streams from journal nodes |
+| `EditLogFetchTimeAvgTime` | Average time (in milliseconds) spent by standby NameNode in
fetching remote edit streams from journal nodes |
+| `EditLogFetchTime`*num*`s(50/75/90/95/99)thPercentileLatency` | The 50/75/90/95/99th percentile
of time spent in fetching edit streams from journal nodes by standby NameNode, in milliseconds.
Percentile measurement is off by default, by watching no intervals. The intervals are specified
by `dfs.metrics.percentiles.intervals`. |
+| `NumEditLogLoadedNumOps` | Total number of times edits were loaded by standby NameNode
|
+| `NumEditLogLoadedAvgCount` | Average number of edits loaded by standby NameNode in each
edit log tailing |
+| `NumEditLogLoaded`*num*`s(50/75/90/95/99)thPercentileCount` | The 50/75/90/95/99th percentile
of number of edits loaded by standby NameNode in each edit log tailing. Percentile measurement
is off by default, by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`.
|
+| `EditLogTailIntervalNumOps` | Total number of intervals between edit log tailings by standby
NameNode |
+| `EditLogTailIntervalAvgTime` | Average time of intervals between edit log tailings by standby
NameNode in milliseconds |
+| `EditLogTailInterval`*num*`s(50/75/90/95/99)thPercentileLatency` | The 50/75/90/95/99th
percentile of time between edit log tailings by standby NameNode, in milliseconds. Percentile
measurement is off by default, by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`.
|
 
 FSNamesystem
 ------------

http://git-wip-us.apache.org/repos/asf/hadoop/blob/8e7548d3/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/MetricsAsserts.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/MetricsAsserts.java
b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/MetricsAsserts.java
index caa65c5..d384834 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/MetricsAsserts.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/MetricsAsserts.java
@@ -367,17 +367,31 @@ public class MetricsAsserts {
   }
   
   /**
-   * Asserts that the NumOps and quantiles for a metric have been changed at
-   * some point to a non-zero value.
+   * Asserts that the NumOps and quantiles for a metric with value name
+   * "Latency" have been changed at some point to a non-zero value.
    * 
    * @param prefix of the metric
    * @param rb MetricsRecordBuilder with the metric
    */
-  public static void assertQuantileGauges(String prefix, 
+  public static void assertQuantileGauges(String prefix,
       MetricsRecordBuilder rb) {
+    assertQuantileGauges(prefix, rb, "Latency");
+  }
+
+  /**
+   * Asserts that the NumOps and quantiles for a metric have been changed at
+   * some point to a non-zero value, for the specified value name of the
+   * metrics (e.g., "Latency", "Count").
+   *
+   * @param prefix of the metric
+   * @param rb MetricsRecordBuilder with the metric
+   * @param valueName the value name for the metric
+   */
+  public static void assertQuantileGauges(String prefix,
+      MetricsRecordBuilder rb, String valueName) {
     verify(rb).addGauge(eqName(info(prefix + "NumOps", "")), geq(0l));
     for (Quantile q : MutableQuantiles.quantiles) {
-      String nameTemplate = prefix + "%dthPercentileLatency";
+      String nameTemplate = prefix + "%dthPercentile" + valueName;
       int percentile = (int) (100 * q.quantile);
       verify(rb).addGauge(
           eqName(info(String.format(nameTemplate, percentile), "")),

http://git-wip-us.apache.org/repos/asf/hadoop/blob/8e7548d3/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
index 73a111e..2003f94 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
@@ -62,6 +62,7 @@ import static org.apache.hadoop.util.ExitUtil.terminate;
 
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
+import org.apache.hadoop.util.Time;
 
 
 /**
@@ -292,6 +293,7 @@ public class EditLogTailer {
         LOG.debug("lastTxnId: " + lastTxnId);
       }
       Collection<EditLogInputStream> streams;
+      long startTime = Time.monotonicNow();
       try {
         streams = editLog.selectInputStreams(lastTxnId + 1, 0,
             null, inProgressOk, true);
@@ -302,6 +304,9 @@ public class EditLogTailer {
         LOG.warn("Edits tailer failed to find any streams. Will try again " +
             "later.", ioe);
         return;
+      } finally {
+        NameNode.getNameNodeMetrics().addEditLogFetchTime(
+            Time.monotonicNow() - startTime);
       }
       if (LOG.isDebugEnabled()) {
         LOG.debug("edit streams to load from: " + streams.size());
@@ -322,6 +327,7 @@ public class EditLogTailer {
           LOG.debug(String.format("Loaded %d edits starting from txid %d ",
               editsLoaded, lastTxnId));
         }
+        NameNode.getNameNodeMetrics().addNumEditLogLoaded(editsLoaded);
       }
 
       if (editsLoaded > 0) {
@@ -446,10 +452,15 @@ public class EditLogTailer {
           // name system lock will be acquired to further block even the block
           // state updates.
           namesystem.cpLockInterruptibly();
+          long startTime = Time.monotonicNow();
           try {
+            NameNode.getNameNodeMetrics().addEditLogTailInterval(
+                startTime - lastLoadTimeMs);
             doTailEdits();
           } finally {
             namesystem.cpUnlock();
+            NameNode.getNameNodeMetrics().addEditLogTailTime(
+                Time.monotonicNow() - startTime);
           }
           //Update NameDirSize Metric
           namesystem.getFSImage().getStorage().updateNameDirSize();

http://git-wip-us.apache.org/repos/asf/hadoop/blob/8e7548d3/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/NameNodeMetrics.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/NameNodeMetrics.java
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/NameNodeMetrics.java
index 94c5e9e..5d43ebf 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/NameNodeMetrics.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/NameNodeMetrics.java
@@ -32,6 +32,7 @@ import org.apache.hadoop.metrics2.lib.MutableCounterLong;
 import org.apache.hadoop.metrics2.lib.MutableGaugeInt;
 import org.apache.hadoop.metrics2.lib.MutableQuantiles;
 import org.apache.hadoop.metrics2.lib.MutableRate;
+import org.apache.hadoop.metrics2.lib.MutableStat;
 import org.apache.hadoop.metrics2.source.JvmMetrics;
 
 /**
@@ -134,6 +135,18 @@ public class NameNodeMetrics {
   @Metric("Time loading FS Image at startup in msec")
   MutableGaugeInt fsImageLoadTime;
 
+  @Metric("Time tailing edit logs in msec")
+  MutableRate editLogTailTime;
+  private final MutableQuantiles[] editLogTailTimeQuantiles;
+  @Metric MutableRate editLogFetchTime;
+  private final MutableQuantiles[] editLogFetchTimeQuantiles;
+  @Metric(value = "Number of edits loaded", valueName = "Count")
+  MutableStat numEditLogLoaded;
+  private final MutableQuantiles[] numEditLogLoadedQuantiles;
+  @Metric("Time between edit log tailing in msec")
+  MutableRate editLogTailInterval;
+  private final MutableQuantiles[] editLogTailIntervalQuantiles;
+
   @Metric("GetImageServlet getEdit")
   MutableRate getEdit;
   @Metric("GetImageServlet getImage")
@@ -156,7 +169,11 @@ public class NameNodeMetrics {
     generateEDEKTimeQuantiles = new MutableQuantiles[len];
     warmUpEDEKTimeQuantiles = new MutableQuantiles[len];
     resourceCheckTimeQuantiles = new MutableQuantiles[len];
-    
+    editLogTailTimeQuantiles = new MutableQuantiles[len];
+    editLogFetchTimeQuantiles = new MutableQuantiles[len];
+    numEditLogLoadedQuantiles = new MutableQuantiles[len];
+    editLogTailIntervalQuantiles = new MutableQuantiles[len];
+
     for (int i = 0; i < len; i++) {
       int interval = intervals[i];
       syncsQuantiles[i] = registry.newQuantiles(
@@ -181,6 +198,18 @@ public class NameNodeMetrics {
       resourceCheckTimeQuantiles[i] = registry.newQuantiles(
           "resourceCheckTime" + interval + "s",
           "resource check time", "ops", "latency", interval);
+      editLogTailTimeQuantiles[i] = registry.newQuantiles(
+          "editLogTailTime" + interval + "s",
+          "Edit log tailing time", "ops", "latency", interval);
+      editLogFetchTimeQuantiles[i] = registry.newQuantiles(
+          "editLogFetchTime" + interval + "s",
+          "Edit log fetch time", "ops", "latency", interval);
+      numEditLogLoadedQuantiles[i] = registry.newQuantiles(
+          "numEditLogLoaded" + interval + "s",
+          "Number of edits loaded", "ops", "count", interval);
+      editLogTailIntervalQuantiles[i] = registry.newQuantiles(
+          "editLogTailInterval" + interval + "s",
+          "Edit log tailing interval", "ops", "latency", interval);
     }
   }
 
@@ -389,4 +418,32 @@ public class NameNodeMetrics {
       q.add(latency);
     }
   }
+
+  public void addEditLogTailTime(long elapsed) {
+    editLogTailTime.add(elapsed);
+    for (MutableQuantiles q : editLogTailTimeQuantiles) {
+      q.add(elapsed);
+    }
+  }
+
+  public void addEditLogFetchTime(long elapsed) {
+    editLogFetchTime.add(elapsed);
+    for (MutableQuantiles q : editLogFetchTimeQuantiles) {
+      q.add(elapsed);
+    }
+  }
+
+  public void addNumEditLogLoaded(long loaded) {
+    numEditLogLoaded.add(loaded);
+    for (MutableQuantiles q : numEditLogLoadedQuantiles) {
+      q.add(loaded);
+    }
+  }
+
+  public void addEditLogTailInterval(long elapsed) {
+    editLogTailInterval.add(elapsed);
+    for (MutableQuantiles q : editLogTailIntervalQuantiles) {
+      q.add(elapsed);
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/8e7548d3/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java
index 92bc51c..e34deea 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java
@@ -32,6 +32,7 @@ import org.apache.hadoop.hdfs.client.HdfsAdmin;
 import static org.apache.hadoop.fs.CommonConfigurationKeys.HA_HM_RPC_TIMEOUT_DEFAULT;
 import static org.apache.hadoop.fs.CommonConfigurationKeys.HA_HM_RPC_TIMEOUT_KEY;
 import static org.apache.hadoop.test.MetricsAsserts.assertCounter;
+import static org.apache.hadoop.test.MetricsAsserts.assertCounterGt;
 import static org.apache.hadoop.test.MetricsAsserts.assertGauge;
 import static org.apache.hadoop.test.MetricsAsserts.assertQuantileGauges;
 import static org.apache.hadoop.test.MetricsAsserts.getMetrics;
@@ -1059,4 +1060,44 @@ public class TestNameNodeMetrics {
       }
     }
   }
+
+  @Test
+  public void testEditLogTailing() throws Exception {
+    HdfsConfiguration conf = new HdfsConfiguration();
+    conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
+    conf.setInt(DFSConfigKeys.DFS_METRICS_PERCENTILES_INTERVALS_KEY, 60);
+    MiniDFSCluster dfsCluster = null;
+    try {
+      dfsCluster = new MiniDFSCluster.Builder(conf)
+          .numDataNodes(0)
+          .nnTopology(MiniDFSNNTopology.simpleHATopology())
+          .build();
+      DistributedFileSystem dfs = dfsCluster.getFileSystem(0);
+      dfsCluster.transitionToActive(0);
+      dfsCluster.waitActive();
+
+      Path testDir = new Path("/testdir");
+      dfs.mkdir(testDir, FsPermission.getDefault());
+
+      dfsCluster.getNameNodeRpc(0).rollEditLog();
+      Thread.sleep(2 * 1000);
+
+      // We need to get the metrics for the SBN (excluding the NN from dfs
+      // cluster created in setUp() and the ANN).
+      MetricsRecordBuilder rb = getMetrics(NN_METRICS+"-2");
+      assertQuantileGauges("EditLogTailTime60s", rb);
+      assertQuantileGauges("EditLogFetchTime60s", rb);
+      assertQuantileGauges("NumEditLogLoaded60s", rb, "Count");
+      assertQuantileGauges("EditLogTailInterval60s", rb);
+      assertCounterGt("EditLogTailTimeNumOps", 0L, rb);
+      assertCounterGt("EditLogFetchTimeNumOps", 0L, rb);
+      assertCounterGt("NumEditLogLoadedNumOps", 0L, rb);
+      assertCounterGt("EditLogTailIntervalNumOps", 0L, rb);
+    } finally {
+      if (dfsCluster != null) {
+        dfsCluster.shutdown();
+      }
+    }
+
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org


Mime
View raw message