mesos-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ne...@apache.org
Subject [4/8] mesos git commit: Prevent old Mesos agents from registering or re-registering.
Date Wed, 10 May 2017 04:58:28 GMT
Prevent old Mesos agents from registering or re-registering.

Officially, Mesos 1.0.0 (and newer) masters do not support pre-1.0.0
Mesos agents. However, we previously allowed old agents to register,
which resulted in several master crashes. As a short-term solution, we
fixed those crashes by adding backward compatibility mechanisms into the
master, but that backward compatibility code has made the master logic
more complicated and difficult to understand.

This commit changes the master to ignore registration attempts by Mesos
agents that precede Mesos 1.0.0. Now that this safety check is in place,
master logic can safely assume that all agents are running at least
Mesos 1.0.0, which will allow several simplifications to be made.

Review: https://reviews.apache.org/r/58709


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/347ad21a
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/347ad21a
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/347ad21a

Branch: refs/heads/1.3.x
Commit: 347ad21aa07a9d1a40befca163689de36f4a0084
Parents: 2ccaf99
Author: Neil Conway <neil.conway@gmail.com>
Authored: Mon Mar 6 10:10:45 2017 -0800
Committer: Neil Conway <neil.conway@gmail.com>
Committed: Tue May 9 21:28:44 2017 -0700

----------------------------------------------------------------------
 src/master/constants.hpp   |  4 ++
 src/master/master.cpp      | 36 +++++++++++++++++
 src/tests/master_tests.cpp | 86 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 126 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/347ad21a/src/master/constants.hpp
----------------------------------------------------------------------
diff --git a/src/master/constants.hpp b/src/master/constants.hpp
index 7edf9f6..725680b 100644
--- a/src/master/constants.hpp
+++ b/src/master/constants.hpp
@@ -21,6 +21,7 @@
 
 #include <stout/bytes.hpp>
 #include <stout/duration.hpp>
+#include <stout/version.hpp>
 
 namespace mesos {
 namespace internal {
@@ -145,6 +146,9 @@ constexpr char READWRITE_HTTP_AUTHENTICATION_REALM[] =
 constexpr char DEFAULT_HTTP_FRAMEWORK_AUTHENTICATION_REALM[] =
   "mesos-master-scheduler";
 
+// Agents older than this version are not allowed to register.
+const Version MINIMUM_AGENT_VERSION = Version(1, 0, 0);
+
 } // namespace master {
 } // namespace internal {
 } // namespace mesos {

http://git-wip-us.apache.org/repos/asf/mesos/blob/347ad21a/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index 31a7a2f..e5005c6 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -5505,6 +5505,24 @@ void Master::_registerSlave(
     return;
   }
 
+  // Ignore registration attempts by agents running old Mesos versions.
+  // We expect that the agent's version is in SemVer format; if the
+  // version cannot be parsed, the registration attempt is ignored.
+  Try<Version> parsedVersion = Version::parse(version);
+
+  if (parsedVersion.isError()) {
+    LOG(WARNING) << "Failed to parse version '" << version << "'"
+                 << " of agent at " << pid << ": " << parsedVersion.error()
+                 << "; ignoring agent registration attempt";
+    return;
+  } else if (parsedVersion.get() < MINIMUM_AGENT_VERSION) {
+    LOG(WARNING) << "Ignoring registration attempt from old agent at "
+                 << pid << ": agent version is " << parsedVersion.get()
+                 << ", minimum supported agent version is "
+                 << MINIMUM_AGENT_VERSION;
+    return;
+  }
+
   // Check if this slave is already registered (because it retries).
   if (Slave* slave = slaves.registered.get(pid)) {
     if (!slave->connected) {
@@ -5775,6 +5793,24 @@ void Master::_reregisterSlave(
     return;
   }
 
+  // Ignore re-registration attempts by agents running old Mesos versions.
+  // We expect that the agent's version is in SemVer format; if the
+  // version cannot be parsed, the re-registration attempt is ignored.
+  Try<Version> parsedVersion = Version::parse(version);
+
+  if (parsedVersion.isError()) {
+    LOG(WARNING) << "Failed to parse version '" << version << "'"
+                 << " of agent at " << pid << ": " << parsedVersion.error()
+                 << "; ignoring agent re-registration attempt";
+    return;
+  } else if (parsedVersion.get() < MINIMUM_AGENT_VERSION) {
+    LOG(WARNING) << "Ignoring re-registration attempt from old agent at "
+                 << pid << ": agent version is " << parsedVersion.get()
+                 << ", minimum supported agent version is "
+                 << MINIMUM_AGENT_VERSION;
+    return;
+  }
+
   if (Slave* slave = slaves.registered.get(slaveInfo.id())) {
     CHECK(!slaves.recovered.contains(slaveInfo.id()));
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/347ad21a/src/tests/master_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/master_tests.cpp b/src/tests/master_tests.cpp
index 7cb4774..ceee2f4 100644
--- a/src/tests/master_tests.cpp
+++ b/src/tests/master_tests.cpp
@@ -7012,6 +7012,92 @@ TEST_F(MasterTest, MultiRoleSchedulerUnsubscribeFromRole)
   driver2.join();
 }
 
+
+// Check that the master does not allow old Mesos agents to register.
+// We do this by intercepting the agent's `RegisterSlaveMessage` and
+// then re-sending it with a tweaked version number.
+TEST_F(MasterTest, IgnoreOldAgentRegistration)
+{
+  Try<Owned<cluster::Master>> master = StartMaster();
+  ASSERT_SOME(master);
+
+  Future<RegisterSlaveMessage> registerSlaveMessage =
+    DROP_PROTOBUF(RegisterSlaveMessage(), _, _);
+
+  Clock::pause();
+
+  slave::Flags slaveFlags = CreateSlaveFlags();
+  Owned<MasterDetector> detector = master.get()->createDetector();
+  Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), slaveFlags);
+  ASSERT_SOME(slave);
+
+  Clock::advance(slaveFlags.authentication_backoff_factor);
+  Clock::advance(slaveFlags.registration_backoff_factor);
+
+  AWAIT_READY(registerSlaveMessage);
+
+  RegisterSlaveMessage message = registerSlaveMessage.get();
+  message.set_version("0.28.1-rc1");
+
+  // The master should ignore the agent's registration attempt. Hence,
+  // we do not expect the master to try to update the registry.
+  EXPECT_CALL(*master.get()->registrar.get(), apply(_))
+    .Times(0);
+
+  process::post(slave.get()->pid, master.get()->pid, message);
+
+  // Settle the clock to retire in-flight messages.
+  Clock::settle();
+}
+
+
+// Check that the master does not allow old Mesos agents to re-register.
+// We do this by intercepting the agent's `ReregisterSlaveMessage` and
+// then re-sending it with a tweaked version number.
+TEST_F(MasterTest, IgnoreOldAgentReregistration)
+{
+  Try<Owned<cluster::Master>> master = StartMaster();
+  ASSERT_SOME(master);
+
+  Future<SlaveRegisteredMessage> slaveRegisteredMessage =
+    FUTURE_PROTOBUF(SlaveRegisteredMessage(), _, _);
+
+  StandaloneMasterDetector detector(master.get()->pid);
+  slave::Flags slaveFlags = CreateSlaveFlags();
+  Try<Owned<cluster::Slave>> slave = StartSlave(&detector, slaveFlags);
+  ASSERT_SOME(slave);
+
+  AWAIT_READY(slaveRegisteredMessage);
+
+  // Intercept the agent's `ReregisterSlaveMessage`.
+  Future<ReregisterSlaveMessage> reregisterSlaveMessage =
+    DROP_PROTOBUF(ReregisterSlaveMessage(), _, _);
+
+  Clock::pause();
+
+  // Simulate a new master detected event on the slave,
+  // so that the slave will attempt to re-register.
+  detector.appoint(master.get()->pid);
+
+  Clock::advance(slaveFlags.authentication_backoff_factor);
+  Clock::advance(slaveFlags.registration_backoff_factor);
+
+  AWAIT_READY(reregisterSlaveMessage);
+
+  ReregisterSlaveMessage message = reregisterSlaveMessage.get();
+  message.set_version("0.28.1-rc1");
+
+  // The master should ignore the agent's re-registration attempt, so
+  // we do not expect the master to try to update the registry.
+  EXPECT_CALL(*master.get()->registrar.get(), apply(_))
+    .Times(0);
+
+  process::post(slave.get()->pid, master.get()->pid, message);
+
+  // Settle the clock to retire in-flight messages.
+  Clock::settle();
+}
+
 } // namespace tests {
 } // namespace internal {
 } // namespace mesos {


Mime
View raw message