cassandra-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From brandonwilli...@apache.org
Subject [2/4] cassandra git commit: Failure detector detects and ignores local pauses
Date Tue, 12 May 2015 23:40:30 GMT
Failure detector detects and ignores local pauses

Patch by brandonwilliams, reviewed by Richard Low for CASSANDRA-9183


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/4012134f
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/4012134f
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/4012134f

Branch: refs/heads/trunk
Commit: 4012134f6e03b61bbf5ce64cc456cf7592675d67
Parents: 75e5b3b
Author: Brandon Williams <brandonwilliams@apache.org>
Authored: Tue May 12 18:38:48 2015 -0500
Committer: Brandon Williams <brandonwilliams@apache.org>
Committed: Tue May 12 18:38:48 2015 -0500

----------------------------------------------------------------------
 CHANGES.txt                                     |  1 +
 .../apache/cassandra/gms/FailureDetector.java   | 29 ++++++++++++++++++++
 2 files changed, 30 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/4012134f/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 1643f9c..7cb0dfd 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
 2.1.6
+ * Failure detector detects and ignores local pauses (CASSANDRA-9183)
  * Add utility class to support for rate limiting a given log statement (CASSANDRA-9029)
  * Add missing consistency levels to cassandra-stess (CASSANDRA-9361)
  * Fix commitlog getCompletedTasks to not increment (CASSANDRA-9339)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/4012134f/src/java/org/apache/cassandra/gms/FailureDetector.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/gms/FailureDetector.java b/src/java/org/apache/cassandra/gms/FailureDetector.java
index 0c40ae3..322aae2 100644
--- a/src/java/org/apache/cassandra/gms/FailureDetector.java
+++ b/src/java/org/apache/cassandra/gms/FailureDetector.java
@@ -48,6 +48,22 @@ public class FailureDetector implements IFailureDetector, FailureDetectorMBean
     public static final String MBEAN_NAME = "org.apache.cassandra.net:type=FailureDetector";
     private static final int SAMPLE_SIZE = 1000;
     protected static final long INITIAL_VALUE_NANOS = TimeUnit.NANOSECONDS.convert(getInitialValue(),
TimeUnit.MILLISECONDS);
+    private static final long DEFAULT_MAX_PAUSE = 5000L * 1000000L; // 5 seconds
+    private static final long MAX_LOCAL_PAUSE_IN_NANOS = getMaxLocalPause();
+    private long lastInterpret = System.nanoTime();
+    private boolean wasPaused = false;
+
+    private static long getMaxLocalPause()
+    {
+        if (System.getProperty("cassandra.max_local_pause_in_ms") != null)
+        {
+            long pause = Long.parseLong(System.getProperty("cassandra.max_local_pause_in_ms"));
+            logger.warn("Overriding max local pause time to {}ms", pause);
+            return pause * 1000000L;
+        }
+        else
+            return DEFAULT_MAX_PAUSE;
+    }
 
     public static final IFailureDetector instance = new FailureDetector();
 
@@ -228,6 +244,19 @@ public class FailureDetector implements IFailureDetector, FailureDetectorMBean
             return;
         }
         long now = System.nanoTime();
+        long diff = now - lastInterpret;
+        lastInterpret = now;
+        if (diff > MAX_LOCAL_PAUSE_IN_NANOS)
+        {
+            logger.warn("Not marking nodes down due to local pause of {} > {}", diff,
MAX_LOCAL_PAUSE_IN_NANOS);
+            wasPaused = true;
+            return;
+        }
+        if (wasPaused)
+        {
+            wasPaused = false;
+            return;
+        }
         double phi = hbWnd.phi(now);
         if (logger.isTraceEnabled())
             logger.trace("PHI for " + ep + " : " + phi);


Mime
View raw message