mesos-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ji...@apache.org
Subject [1/2] mesos git commit: Fixed containerizer potential race destroy while provisioning.
Date Wed, 23 Mar 2016 22:38:47 GMT
Repository: mesos
Updated Branches:
  refs/heads/master 6be07a8de -> 8d3d47073


Fixed containerizer potential race destroy while provisioning.

Review: https://reviews.apache.org/r/45092/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/cb2298eb
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/cb2298eb
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/cb2298eb

Branch: refs/heads/master
Commit: cb2298ebe7d7dd9ed3c7816011f461848a9d52fa
Parents: 6be07a8
Author: Gilbert Song <songzihao1990@gmail.com>
Authored: Wed Mar 23 15:29:26 2016 -0700
Committer: Jie Yu <yujie.jay@gmail.com>
Committed: Wed Mar 23 15:37:46 2016 -0700

----------------------------------------------------------------------
 src/slave/containerizer/mesos/containerizer.cpp | 56 ++++++++++++++++----
 src/slave/containerizer/mesos/containerizer.hpp |  6 +++
 .../containerizer/mesos_containerizer_tests.cpp |  4 +-
 3 files changed, 54 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/cb2298eb/src/slave/containerizer/mesos/containerizer.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/containerizer.cpp b/src/slave/containerizer/mesos/containerizer.cpp
index ee7a265..e7f7e7f 100644
--- a/src/slave/containerizer/mesos/containerizer.cpp
+++ b/src/slave/containerizer/mesos/containerizer.cpp
@@ -669,13 +669,9 @@ Future<bool> MesosContainerizerProcess::launch(
 
   Container* container = new Container();
   container->directory = directory;
-  container->state = PREPARING;
+  container->state = PROVISIONING;
   container->resources = executorInfo.resources();
 
-  // TODO(lins05): It's possible that we call provisioner->destroy
-  // while provisioner->provision is in progress.  This could cause
-  // leaking of the provisioned directory. See MESOS-4985.
-
   // We need to set the `launchInfos` to be a ready future initially
   // before we starting calling isolator->prepare() because otherwise,
   // the destroy will wait forever trying to wait for this future to
@@ -715,7 +711,12 @@ Future<bool> MesosContainerizerProcess::launch(
 
   const Image& image = executorInfo.container().mesos().image();
 
-  return provisioner->provision(containerId, image)
+  Future<ProvisionInfo> future =
+    provisioner->provision(containerId, image);
+
+  container->provisionInfos.push_back(future);
+
+  return future
     .then(defer(PID<MesosContainerizerProcess>(this),
                 &MesosContainerizerProcess::_launch,
                 containerId,
@@ -744,6 +745,19 @@ Future<bool> MesosContainerizerProcess::_launch(
   CHECK(executorInfo.has_container());
   CHECK_EQ(executorInfo.container().type(), ContainerInfo::MESOS);
 
+  // This is because even if a 'destroy' happens after 'launch' and
+  // before '_launch', the 'destroy' will wait for the 'provision' in
+  // 'launch' to finish. Since we register the '_launch' callback
+  // first, it is guaranteed to be called before '__destroy'.
+  CHECK(containers_.contains(containerId));
+
+  // Make sure containerizer is not in DESTROYING state, to avoid
+  // a possible race that containerizer is destroying the container
+  // while it is provisioning the image from volumes.
+  if (containers_[containerId]->state == DESTROYING) {
+    return Failure("Container is currently being destroyed");
+  }
+
   // We will provision the images specified in ContainerInfo::volumes
   // as well. We will mutate ContainerInfo::volumes to include the
   // paths to the provisioned root filesystems (by setting the
@@ -768,7 +782,10 @@ Future<bool> MesosContainerizerProcess::_launch(
 
     const Image& image = volume->image();
 
-    futures.push_back(provisioner->provision(containerId, image)
+    Future<ProvisionInfo> future = provisioner->provision(containerId, image);
+    containers_[containerId]->provisionInfos.push_back(future);
+
+    futures.push_back(future
       .then([=](const ProvisionInfo& info) -> Future<Nothing> {
         volume->set_host_path(info.rootfs);
 
@@ -790,7 +807,7 @@ Future<bool> MesosContainerizerProcess::_launch(
   // We put `prepare` inside of a lambda expression, in order to get
   // _executorInfo object after host path set in volume.
   return collect(futures)
-    .then([=]() -> Future<bool> {
+    .then(defer([=]() -> Future<bool> {
       return prepare(containerId,
                      taskInfo,
                      *_executorInfo,
@@ -807,7 +824,7 @@ Future<bool> MesosContainerizerProcess::_launch(
                     slavePid,
                     checkpoint,
                     lambda::_1));
-    });
+    }));
 }
 
 
@@ -842,6 +859,8 @@ Future<list<Option<ContainerLaunchInfo>>> MesosContainerizerProcess::prepare(
 {
   CHECK(containers_.contains(containerId));
 
+  containers_[containerId]->state = PREPARING;
+
   // Construct ContainerConfig.
   ContainerConfig containerConfig;
   containerConfig.set_directory(directory);
@@ -1401,6 +1420,25 @@ void MesosContainerizerProcess::destroy(
 
   LOG(INFO) << "Destroying container '" << containerId << "'";
 
+  if (container->state == PROVISIONING) {
+    VLOG(1) << "Waiting for the provisioner to complete for container '"
+            << containerId << "'";
+
+    container->state = DESTROYING;
+
+    // Wait for the provisioner to finish provisioning before we
+    // start destroying the container.
+    await(container->provisionInfos)
+      .onAny(defer(
+          self(),
+          &Self::___destroy,
+          containerId,
+          None(),
+          "Container destroyed while provisioning images"));
+
+    return;
+  }
+
   if (container->state == PREPARING) {
     VLOG(1) << "Waiting for the isolators to complete preparing before "
             << "destroying the container";

http://git-wip-us.apache.org/repos/asf/mesos/blob/cb2298eb/src/slave/containerizer/mesos/containerizer.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/containerizer.hpp b/src/slave/containerizer/mesos/containerizer.hpp
index 3ef6a67..a6e3a8b 100644
--- a/src/slave/containerizer/mesos/containerizer.hpp
+++ b/src/slave/containerizer/mesos/containerizer.hpp
@@ -285,6 +285,7 @@ private:
 
   enum State
   {
+    PROVISIONING,
     PREPARING,
     ISOLATING,
     FETCHING,
@@ -302,6 +303,11 @@ private:
     // the executor exits.
     process::Future<Option<int>> status;
 
+    // We keep track of the future that is waiting for the provisioner's
+    // `ProvisionInfo`, so that destroy will only start calling
+    // provisioner->destroy after provisioner->provision has finished.
+    std::list<process::Future<ProvisionInfo>> provisionInfos;
+
     // We keep track of the future that is waiting for all the
     // isolators' prepare futures, so that destroy will only start
     // calling cleanup after all isolators has finished preparing.

http://git-wip-us.apache.org/repos/asf/mesos/blob/cb2298eb/src/tests/containerizer/mesos_containerizer_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/containerizer/mesos_containerizer_tests.cpp b/src/tests/containerizer/mesos_containerizer_tests.cpp
index f3ca32b..f7f10da 100644
--- a/src/tests/containerizer/mesos_containerizer_tests.cpp
+++ b/src/tests/containerizer/mesos_containerizer_tests.cpp
@@ -809,10 +809,8 @@ TEST_F(MesosContainerizerProvisionerTest, ProvisionFailed)
 
   containerizer::Termination termination = wait.get();
 
-  // TODO(lins05): Improve the assertion once we add the
-  // "PROVISIONING" state. See MESOS-4985.
   EXPECT_EQ(
-    "Container destroyed while preparing isolators",
+    "Container destroyed while provisioning images",
     termination.message());
 
   EXPECT_FALSE(termination.has_status());


Mime
View raw message