mesos-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From gilb...@apache.org
Subject [3/4] mesos git commit: Reverted "Fixed `wait()` and `destroy()` in composing containerizer.".
Date Sat, 13 Jan 2018 01:03:18 GMT
Reverted "Fixed `wait()` and `destroy()` in composing containerizer.".

This reverts commit 95decd404438abd422794524e01d72a889821566.

There are two reasons to revert this commit:
  1. After the agent recovers, if the nested containers that are
     launched beforehand are killed, they will no longer be updated
     with new status, because the `WAIT_NESTED_CONTAINER` call from
     the default executor will end up with a future forever. Please
     see MESOS-8391 for details.
  2. The original commit makes the composing containerizer wait()
     and destroy() rely on the same future of a ContainerTermination
     promise. This would get into the bug that composing containerizer
     destroy() may fail due to the wait() future got discarded.
     Need to protect it by using `undiscardable()`. Please see
     MESOS-7926 for details.

Review: https://reviews.apache.org/r/65139


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/dd6ab9dc
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/dd6ab9dc
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/dd6ab9dc

Branch: refs/heads/master
Commit: dd6ab9dcc7b8b9ceb40528e0879cd4e9eace8132
Parents: b4372ac
Author: Gilbert Song <songzihao1990@gmail.com>
Authored: Thu Jan 11 17:59:37 2018 -0800
Committer: Gilbert Song <songzihao1990@gmail.com>
Committed: Fri Jan 12 17:01:07 2018 -0800

----------------------------------------------------------------------
 src/slave/containerizer/composing.cpp | 89 +++++++++++++++---------------
 1 file changed, 44 insertions(+), 45 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/dd6ab9dc/src/slave/containerizer/composing.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/composing.cpp b/src/slave/containerizer/composing.cpp
index 9ace70d..cd840a5 100644
--- a/src/slave/containerizer/composing.cpp
+++ b/src/slave/containerizer/composing.cpp
@@ -118,12 +118,6 @@ private:
       const ContainerID& containerId,
       Containerizer::LaunchResult launchResult);
 
-  // Last step in the container destruction chain; when it finishes, the
-  // container must be removed from the internal collection.
-  void _destroy(
-      const ContainerID& containerId,
-      const Future<Option<ContainerTermination>>& future);
-
   vector<Containerizer*> containerizers_;
 
   // The states that the composing containerizer cares about for the
@@ -141,7 +135,7 @@ private:
   {
     State state;
     Containerizer* containerizer;
-    Promise<Option<ContainerTermination>> termination;
+    Promise<bool> destroyed;
   };
 
   hashmap<ContainerID, Container*> containers_;
@@ -360,7 +354,7 @@ Future<Containerizer::LaunchResult> ComposingContainerizerProcess::_launch(
       // This is needed for eventually removing the given container from
       // the list of active containers.
       container->containerizer->wait(containerId)
-        .onAny(defer(self(), &Self::_destroy, containerId, lambda::_1));
+        .onAny(defer(self(), &Self::destroy, containerId));
     }
 
     // Note that the return value is not impacted
@@ -376,10 +370,10 @@ Future<Containerizer::LaunchResult> ComposingContainerizerProcess::_launch(
   if (containerizer == containerizers_.end()) {
     // If we are here none of the containerizers support the launch.
 
-    // We set this to `None` because the container has no chance of
+    // We set this to `false` because the container has no chance of
     // getting launched by any containerizer. This is similar to what
     // would happen if the destroy "started" after launch returned false.
-    container->termination.set(Option<ContainerTermination>::none());
+    container->destroyed.set(false);
 
     // We destroy the container irrespective whether
     // a destroy is already in progress, for simplicity.
@@ -395,11 +389,9 @@ Future<Containerizer::LaunchResult> ComposingContainerizerProcess::_launch(
     // potentially launch this container. But since a destroy is in progress
     // we do not try any other containerizers.
 
-    // If the destroy-in-progress stopped an launch-in-progress (using the next
-    // containerizer), then we need to set a value to the `termination` promise,
-    // because we consider `wait()` and `destroy()` operations as successful.
-    container->termination.set(
-        Option<ContainerTermination>(ContainerTermination()));
+    // We set this to `true` because the destroy-in-progress stopped an
+    // launch-in-progress (using the next containerizer).
+    container->destroyed.set(true);
 
     containers_.erase(containerId);
     delete container;
@@ -509,7 +501,7 @@ Future<Containerizer::LaunchResult> ComposingContainerizerProcess::_launch(
       // This is needed for eventually removing the given container from
       // the list of active containers.
       container->containerizer->wait(containerId)
-        .onAny(defer(self(), &Self::_destroy, containerId, lambda::_1));
+        .onAny(defer(self(), &Self::destroy, containerId));
     }
 
     // Note that the return value is not impacted
@@ -519,10 +511,10 @@ Future<Containerizer::LaunchResult> ComposingContainerizerProcess::_launch(
 
   // If we are here, the launch is not supported by the containerizer.
 
-  // We set this to `None` because the container has no chance of
+  // We set this to `false` because the container has no chance of
   // getting launched. This is similar to what would happen if the
   // destroy "started" after launch returned false.
-  container->termination.set(Option<ContainerTermination>::none());
+  container->destroyed.set(false);
 
   // We destroy the container irrespective whether
   // a destroy is already in progress, for simplicity.
@@ -582,10 +574,6 @@ Future<ContainerStatus> ComposingContainerizerProcess::status(
 Future<Option<ContainerTermination>> ComposingContainerizerProcess::wait(
     const ContainerID& containerId)
 {
-  if (containers_.contains(containerId)) {
-    return containers_[containerId]->termination.future();
-  }
-
   // A nested container might have already been terminated, therefore
   // `containers_` might not contain it, but its exit status might have
   // been checkpointed.
@@ -621,16 +609,8 @@ Future<bool> ComposingContainerizerProcess::destroy(
       break; // No-op.
 
     case LAUNCHING:
-    case LAUNCHED:
       container->state = DESTROYING;
 
-      // If the container is destroyed while `launch()` is in progress,
-      // `wait()` will not be called in `_launch()`, so we should call
-      // `wait()` to cleanup state of the container in `_destroy()`.
-      // Note that it is safe to call `_destroy()` multiple times.
-      container->containerizer->wait(containerId)
-        .onAny(defer(self(), &Self::_destroy, containerId, lambda::_1));
-
       // Forward the destroy request to the containerizer. Note that
       // a containerizer is expected to handle a destroy while
       // `launch()` is in progress. If the containerizer could not
@@ -638,27 +618,46 @@ Future<bool> ComposingContainerizerProcess::destroy(
       // then the containerizer may no longer know about this
       // container. If the launch returns false, we will stop trying
       // to launch the container on other containerizers.
-      container->containerizer->destroy(containerId);
+      container->containerizer->destroy(containerId)
+        .onAny(defer(self(), [=](const Future<bool>& destroy) {
+          // We defer the association of the promise in order to
+          // surface a successful destroy (by setting
+          // `Container.destroyed` to true in `_launch()`) when
+          // the containerizer cannot handle this type of container
+          // (`launch()` returns false). If we do not defer here and
+          // instead associate the future right away, the setting of
+          // `Container.destroy` in `_launch()` will be a no-op;
+          // this might result in users waiting on the future
+          // incorrectly thinking that the destroy failed when in
+          // fact the destroy is implicitly successful because the
+          // launch failed.
+          if (containers_.contains(containerId)) {
+            containers_.at(containerId)->destroyed.associate(destroy);
+            delete containers_.at(containerId);
+            containers_.erase(containerId);
+          }
+        }));
 
       break;
-  }
 
-  return container->termination.future()
-    .then([](const Option<ContainerTermination>& termination) {
-      return termination.isSome();
-    });
-}
+    case LAUNCHED:
+      container->state = DESTROYING;
 
+      container->destroyed.associate(
+          container->containerizer->destroy(containerId));
 
-void ComposingContainerizerProcess::_destroy(
-    const ContainerID& containerId,
-    const Future<Option<ContainerTermination>>& future)
-{
-  if (containers_.contains(containerId)) {
-    containers_.at(containerId)->termination.associate(future);
-    delete containers_.at(containerId);
-    containers_.erase(containerId);
+      container->destroyed.future()
+        .onAny(defer(self(), [=](const Future<bool>& destroy) {
+          if (containers_.contains(containerId)) {
+            delete containers_.at(containerId);
+            containers_.erase(containerId);
+          }
+        }));
+
+      break;
   }
+
+  return container->destroyed.future();
 }
 
 


Mime
View raw message