mesos-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jo...@apache.org
Subject [2/2] mesos git commit: Changed ZooKeeper reconnection logic to retry more aggressively.
Date Sun, 31 Jan 2016 01:43:50 GMT
Changed ZooKeeper reconnection logic to retry more aggressively.

The previous implementation of `GroupProcess` tried to establish a
single ZooKeeper connection on startup, but didn't attempt to retry.
ZooKeeper will retry internally, but it only retries by attempting to
reconnect to a list of previously resolved IPs; it doesn't attempt to
re-resolve those IPs to pickup updates to DNS configuration. Because
DNS configuration can be quite dynamic, we now close the current Zk
handle and open a new one if we've seen a successful `zookeeper_init`
but haven't been connected within the ZooKeeper session timeout.

Review: https://reviews.apache.org/r/42988/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/c2d496ed
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/c2d496ed
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/c2d496ed

Branch: refs/heads/master
Commit: c2d496ed430eaf7daee3e57edefa39c25af2aa43
Parents: ef64fc0
Author: Neil Conway <neil.conway@gmail.com>
Authored: Sat Jan 30 20:02:15 2016 -0500
Committer: Joris Van Remoortere <joris.van.remoortere@gmail.com>
Committed: Sat Jan 30 20:41:39 2016 -0500

----------------------------------------------------------------------
 src/tests/group_tests.cpp   | 26 +++++++++++++
 src/zookeeper/group.cpp     | 80 +++++++++++++++++++++++++++-------------
 src/zookeeper/group.hpp     |  8 ++--
 src/zookeeper/zookeeper.cpp | 45 ++++++++++++++++------
 4 files changed, 119 insertions(+), 40 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/c2d496ed/src/tests/group_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/group_tests.cpp b/src/tests/group_tests.cpp
index 7734946..6344fad 100644
--- a/src/tests/group_tests.cpp
+++ b/src/tests/group_tests.cpp
@@ -34,6 +34,7 @@
 using zookeeper::Group;
 using zookeeper::GroupProcess;
 
+using process::Clock;
 using process::Future;
 
 using std::string;
@@ -433,6 +434,31 @@ TEST_F(GroupTest, LabelledGroup)
   ASSERT_TRUE(membership.get().cancelled().get());
 }
 
+
+// This test checks that the `expired` event is invoked even if we
+// have not ever established a connection to ZooKeeper (MESOS-4546).
+TEST_F(GroupTest, ConnectTimer)
+{
+  const Duration sessionTimeout = Seconds(10);
+
+  Clock::pause();
+
+  // Ensure that we won't be able to establish a connection to ZooKeeper.
+  server->shutdownNetwork();
+
+  Future<Nothing> expired = FUTURE_DISPATCH(_, &GroupProcess::expired);
+
+  Group group(server->connectString(), sessionTimeout, "/test/");
+
+  // Advance the clock to ensure that we forcibly expire the current
+  // ZooKeeper connection attempt and try to reconnect.
+  Clock::advance(sessionTimeout);
+
+  AWAIT_READY(expired);
+
+  Clock::resume();
+}
+
 } // namespace tests {
 } // namespace internal {
 } // namespace mesos {

http://git-wip-us.apache.org/repos/asf/mesos/blob/c2d496ed/src/zookeeper/group.cpp
----------------------------------------------------------------------
diff --git a/src/zookeeper/group.cpp b/src/zookeeper/group.cpp
index add01a3..ded1458 100644
--- a/src/zookeeper/group.cpp
+++ b/src/zookeeper/group.cpp
@@ -125,6 +125,9 @@ GroupProcess::GroupProcess(
 {}
 
 
+// NB: The `retry` and `connect` timers might still be active. However,
+// we don't need to clean them up -- when the timers fire, they will
+// attempt to dispatch to a no-longer-valid PID, which is a no-op.
 GroupProcess::~GroupProcess()
 {
   discard(&pending.joins);
@@ -141,11 +144,28 @@ void GroupProcess::initialize()
 {
   // Doing initialization here allows to avoid the race between
   // instantiating the ZooKeeper instance and being spawned ourself.
+  startConnection();
+}
+
+
+void GroupProcess::startConnection()
+{
   watcher = new ProcessWatcher<GroupProcess>(self());
   zk = new ZooKeeper(servers, sessionTimeout, watcher);
   state = CONNECTING;
-}
 
+  // If the connection is not established within the session timeout,
+  // close the ZooKeeper handle and create a new one. This is
+  // important because the ZooKeeper 3.4 client libraries don't try to
+  // re-resolve the list of hostnames, so we create a new ZooKeeper
+  // handle to ensure we observe DNS changes. See MESOS-4546 and
+  // `ZooKeeperProcess::initialize` for more information.
+  CHECK_NONE(connectTimer);
+  connectTimer = delay(zk->getSessionTimeout(),
+                       self(),
+                       &Self::timedout,
+                       zk->getSessionId());
+}
 
 Future<Group::Membership> GroupProcess::join(
     const string& data,
@@ -346,11 +366,17 @@ void GroupProcess::connected(int64_t sessionId, bool reconnect)
       << state;
   }
 
-  // Cancel and cleanup the reconnect timer (if necessary).
-  if (timer.isSome()) {
-    Clock::cancel(timer.get());
-    timer = None();
-  }
+  // Cancel and cleanup the connect timer. The timer should always be
+  // set, because it is set before making the initial connection
+  // attempt and whenever a reconnection attempt is made.
+  CHECK_SOME(connectTimer);
+
+  // Now that we are connected, we'll learn about a subsequent
+  // disconnection event via the `reconnecting` callback. At that
+  // point we'll also restart the `connectTimer` to ensure we retry
+  // the reconnection attempt.
+  Clock::cancel(connectTimer.get());
+  connectTimer = None();
 
   // Sync group operations (and set up the group on ZK).
   Try<bool> synced = sync();
@@ -446,13 +472,18 @@ void GroupProcess::reconnecting(int64_t sessionId)
   // we create a local timer and "expire" our session prematurely if
   // we haven't reconnected within the session expiration time out.
   // The timer can be reset if the connection is restored.
-  CHECK_NONE(timer);
 
-  // Use the negotiated session timeout for the reconnect timer.
-  timer = delay(zk->getSessionTimeout(),
-                self(),
-                &Self::timedout,
-                zk->getSessionId());
+  // We expect to see exactly one `reconnecting` event when our
+  // session is disconnected, even if we're disconnected for an
+  // extended period. Since we clear the `connectTimer` when the
+  // connection is established, it should still be unset here.
+  CHECK_NONE(connectTimer);
+
+  // Use the negotiated session timeout for the connect timer.
+  connectTimer = delay(zk->getSessionTimeout(),
+                       self(),
+                       &Self::timedout,
+                       zk->getSessionId());
 }
 
 
@@ -464,13 +495,13 @@ void GroupProcess::timedout(int64_t sessionId)
 
   CHECK_NOTNULL(zk);
 
-  if (timer.isSome() &&
-      timer.get().timeout().expired() &&
+  // The connect timer can be reset or replaced and `zk`
+  // can be replaced since this method was dispatched.
+  if (connectTimer.isSome() &&
+      connectTimer.get().timeout().expired() &&
       zk->getSessionId() == sessionId) {
-    // The timer can be reset or replaced and 'zk' can be replaced
-    // since this method was dispatched.
-    LOG(WARNING) << "Timed out waiting to reconnect to ZooKeeper."
-                 << " Forcing ZooKeeper session "
+    LOG(WARNING) << "Timed out waiting to connect to ZooKeeper. "
+                 << "Forcing ZooKeeper session "
                  << "(sessionId=" << std::hex << sessionId << ")
expiration";
 
     // Locally determine that the current session has expired.
@@ -490,10 +521,10 @@ void GroupProcess::expired(int64_t sessionId)
   // Cancel the retries. Group will sync() after it reconnects to ZK.
   retrying = false;
 
-  // Cancel and cleanup the reconnect timer (if necessary).
-  if (timer.isSome()) {
-    Clock::cancel(timer.get());
-    timer = None();
+  // Cancel and cleanup the connect timer (if necessary).
+  if (connectTimer.isSome()) {
+    Clock::cancel(connectTimer.get());
+    connectTimer = None();
   }
 
   // From the group's local perspective all the memberships are
@@ -529,10 +560,7 @@ void GroupProcess::expired(int64_t sessionId)
 
   delete CHECK_NOTNULL(zk);
   delete CHECK_NOTNULL(watcher);
-  watcher = new ProcessWatcher<GroupProcess>(self());
-  zk = new ZooKeeper(servers, sessionTimeout, watcher);
-
-  state = CONNECTING;
+  startConnection();
 }
 
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/c2d496ed/src/zookeeper/group.hpp
----------------------------------------------------------------------
diff --git a/src/zookeeper/group.hpp b/src/zookeeper/group.hpp
index ed5d0a0..db8a120 100644
--- a/src/zookeeper/group.hpp
+++ b/src/zookeeper/group.hpp
@@ -208,6 +208,8 @@ public:
   void deleted(int64_t sessionId, const std::string& path);
 
 private:
+  void startConnection();
+
   Result<Group::Membership> doJoin(
       const std::string& data,
       const Option<std::string>& label);
@@ -339,9 +341,9 @@ private:
   // cache and 'Some' represents a valid cache.
   Option<std::set<Group::Membership> > memberships;
 
-  // The timer that determines whether we should quit waiting for the
-  // connection to be restored.
-  Option<process::Timer> timer;
+  // A timer that controls when we should give up on waiting for the
+  // current connection attempt to succeed and try to reconnect.
+  Option<process::Timer> connectTimer;
 };
 
 } // namespace zookeeper {

http://git-wip-us.apache.org/repos/asf/mesos/blob/c2d496ed/src/zookeeper/zookeeper.cpp
----------------------------------------------------------------------
diff --git a/src/zookeeper/zookeeper.cpp b/src/zookeeper/zookeeper.cpp
index 790bd15..02fa158 100644
--- a/src/zookeeper/zookeeper.cpp
+++ b/src/zookeeper/zookeeper.cpp
@@ -70,17 +70,40 @@ public:
 
   virtual void initialize()
   {
-    // We retry zookeeper_init until the timeout elapses because we've
-    // seen cases where temporary DNS outages cause the slave to abort
-    // here. See MESOS-1326 for more information.
-    // ZooKeeper masks EAI_AGAIN as EINVAL and a name resolution timeout
-    // may be upwards of 30 seconds. As such, a 10 second timeout is not
-    // enough. Hard code this to 10 minutes to be sure we're trying again
-    // in the face of temporary name resolution failures. See MESOS-1523
-    // for more information.
-    const Timeout timeout_ = Timeout::in(Minutes(10));
-
-    while (!timeout_.expired()) {
+    // There are two different timeouts here:
+    //
+    // (1) `sessionTimeout` is the client's proposed value for the
+    // ZooKeeper session timeout.
+    //
+    // (2) `initLoopTimeout` is how long we are prepared to wait,
+    // calling `zookeeper_init` in a loop, until a call succeeds.
+    //
+    // `sessionTimeout` is used to determine the liveness of our
+    // ZooKeeper session. `initLoopTimeout` determines how long to
+    // retry erroneous calls to `zookeeper_init`, because there are
+    // cases when temporary DNS outages cause `zookeeper_init` to
+    // return failure. ZooKeeper masks EAI_AGAIN as EINVAL and a name
+    // resolution timeout may be upwards of 30 seconds. As such, a 10
+    // second timeout (the default `sessionTimeout`) is not enough. We
+    // hardcode `initLoopTimeout` to 10 minutes ensure we're trying
+    // again in the face of temporary name resolution failures. See
+    // MESOS-1523 for more information.
+    //
+    // Note that there are cases where `zookeeper_init` returns
+    // success but we don't see a subsequent ZooKeeper event
+    // indicating that our connection has been established. A common
+    // cause for this situation is that the ZK hostname list resolves
+    // to unreachable IP addresses. ZooKeeper will continue looping,
+    // trying to connect to the list of IPs but never attempting to
+    // re-resolve the input hostnames. Since DNS may have changed, we
+    // close the ZK handle and create a new handle to ensure that ZK
+    // will try to re-resolve the configured list of hostnames.
+    // However, since we can't easily check if the `connected` ZK
+    // event has been fired for this session yet, we implement this
+    // timeout in `GroupProcess`. See MESOS-4546 for more information.
+    const Timeout initLoopTimeout = Timeout::in(Minutes(10));
+
+    while (!initLoopTimeout.expired()) {
       zh = zookeeper_init(
           servers.c_str(),
           event,


Mime
View raw message