hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From a..@apache.org
Subject hadoop git commit: HDFS-9305. Delayed heartbeat processing causes storm of subsequent heartbeats. (Contributed by Arpit Agarwal)
Date Mon, 26 Oct 2015 22:53:49 GMT
Repository: hadoop
Updated Branches:
  refs/heads/branch-2.7 f2fd011cc -> 653ef52ef


HDFS-9305. Delayed heartbeat processing causes storm of subsequent heartbeats. (Contributed
by Arpit Agarwal)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/653ef52e
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/653ef52e
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/653ef52e

Branch: refs/heads/branch-2.7
Commit: 653ef52ef267b5a46642e75ab08bd34ddae5503d
Parents: f2fd011
Author: Arpit Agarwal <arp@apache.org>
Authored: Mon Oct 26 15:45:02 2015 -0700
Committer: Arpit Agarwal <arp@apache.org>
Committed: Mon Oct 26 15:47:37 2015 -0700

----------------------------------------------------------------------
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt     |  3 +++
 .../hdfs/server/datanode/BPServiceActor.java    |  4 ++--
 .../datanode/TestBpServiceActorScheduler.java   | 22 ++++++++++++++++++++
 3 files changed, 27 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/653ef52e/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 0ff7733..50bc0c4 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -75,6 +75,9 @@ Release 2.7.2 - UNRELEASED
     HDFS-9290. DFSClient#callAppend() is not backward compatible for slightly
     older NameNodes. (Tony Wu via kihwal)
 
+    HDFS-9305. Delayed heartbeat processing causes storm of subsequent
+    heartbeats. (Arpit Agarwal)
+
 Release 2.7.1 - 2015-07-06
 
   INCOMPATIBLE CHANGES

http://git-wip-us.apache.org/repos/asf/hadoop/blob/653ef52e/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java
index b497cb6..45b1123 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java
@@ -539,6 +539,7 @@ class BPServiceActor implements Runnable {
   }
   
   HeartbeatResponse sendHeartBeat() throws IOException {
+    scheduler.scheduleNextHeartbeat();
     StorageReport[] reports =
         dn.getFSDataset().getStorageReports(bpos.getBlockPoolId());
     if (LOG.isDebugEnabled()) {
@@ -648,7 +649,6 @@ class BPServiceActor implements Runnable {
           // -- Total capacity
           // -- Bytes remaining
           //
-          scheduler.scheduleNextHeartbeat();
           if (!dn.areHeartbeatsDisabledForTests()) {
             HeartbeatResponse resp = sendHeartBeat();
             assert resp != null;
@@ -1038,7 +1038,7 @@ class BPServiceActor implements Runnable {
 
     long scheduleNextHeartbeat() {
       // Numerical overflow is possible here and is okay.
-      nextHeartbeatTime += heartbeatIntervalMs;
+      nextHeartbeatTime = monotonicNow() + heartbeatIntervalMs;
       return nextHeartbeatTime;
     }
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/653ef52e/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBpServiceActorScheduler.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBpServiceActorScheduler.java
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBpServiceActorScheduler.java
index 0d7484c..258a14b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBpServiceActorScheduler.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBpServiceActorScheduler.java
@@ -144,6 +144,28 @@ public class TestBpServiceActorScheduler {
     }
   }
 
+
+  /**
+   * Regression test for HDFS-9305.
+   * Delayed processing of a heartbeat can cause a subsequent heartbeat
+   * storm.
+   */
+  @Test
+  public void testScheduleDelayedHeartbeat() {
+    for (final long now : getTimestamps()) {
+      Scheduler scheduler = makeMockScheduler(now);
+      scheduler.scheduleNextHeartbeat();
+      assertFalse(scheduler.isHeartbeatDue(now));
+
+      // Simulate a delayed heartbeat e.g. due to slow processing by NN.
+      scheduler.nextHeartbeatTime = now - (HEARTBEAT_INTERVAL_MS * 10);
+      scheduler.scheduleNextHeartbeat();
+
+      // Ensure that the next heartbeat is not due immediately.
+      assertFalse(scheduler.isHeartbeatDue(now));
+    }
+  }
+
   private Scheduler makeMockScheduler(long now) {
     LOG.info("Using now = " + now);
     Scheduler mockScheduler = spy(new Scheduler(HEARTBEAT_INTERVAL_MS, BLOCK_REPORT_INTERVAL_MS));


Mime
View raw message