aurora-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From wick...@apache.org
Subject aurora git commit: Daemonize all deadline calls in aurora executor.
Date Mon, 01 Jun 2015 22:20:38 GMT
Repository: aurora
Updated Branches:
  refs/heads/master 827b9abea -> 73ceeb22a


Daemonize all deadline calls in aurora executor.

If we do not daemonize, it's possible for the aurora executor to send
TASK_KILLED and then block indefinitely on shutdown.  This way the aurora
executor process will at least exit, allow the cgroup to tear down all
active processes.

Testing Done:
./pants test src/test/python/apache/aurora/executor::

Bugs closed: AURORA-698

Reviewed at https://reviews.apache.org/r/34484/


Project: http://git-wip-us.apache.org/repos/asf/aurora/repo
Commit: http://git-wip-us.apache.org/repos/asf/aurora/commit/73ceeb22
Tree: http://git-wip-us.apache.org/repos/asf/aurora/tree/73ceeb22
Diff: http://git-wip-us.apache.org/repos/asf/aurora/diff/73ceeb22

Branch: refs/heads/master
Commit: 73ceeb22a18e4b3df3bffb04cf7d58527066fb5a
Parents: 827b9ab
Author: Brian Wickman <wickman@apache.org>
Authored: Mon Jun 1 15:20:25 2015 -0700
Committer: Brian Wickman <wickman@apache.org>
Committed: Mon Jun 1 15:20:25 2015 -0700

----------------------------------------------------------------------
 .../apache/aurora/executor/aurora_executor.py    | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/aurora/blob/73ceeb22/src/main/python/apache/aurora/executor/aurora_executor.py
----------------------------------------------------------------------
diff --git a/src/main/python/apache/aurora/executor/aurora_executor.py b/src/main/python/apache/aurora/executor/aurora_executor.py
index df0df0c..7ad179e 100644
--- a/src/main/python/apache/aurora/executor/aurora_executor.py
+++ b/src/main/python/apache/aurora/executor/aurora_executor.py
@@ -31,6 +31,10 @@ from .executor_base import ExecutorBase
 from .status_manager import StatusManager
 
 
+def propagate_deadline(*args, **kw):
+  return deadline(*args, daemon=True, propagate=True, **kw)
+
+
 class AuroraExecutor(ExecutorBase, Observable):
   PERSISTENCE_WAIT = Amount(5, Time.SECONDS)
   SANDBOX_INITIALIZATION_TIMEOUT = Amount(10, Time.MINUTES)
@@ -118,8 +122,7 @@ class AuroraExecutor(ExecutorBase, Observable):
     self._sandbox = self._sandbox_provider.from_assigned_task(assigned_task)
     self.sandbox_initialized.set()
     try:
-      deadline(self._sandbox.create, timeout=self.SANDBOX_INITIALIZATION_TIMEOUT,
-               daemon=True, propagate=True)
+      propagate_deadline(self._sandbox.create, timeout=self.SANDBOX_INITIALIZATION_TIMEOUT)
     except Timeout:
       self._die(driver, mesos_pb2.TASK_FAILED, 'Timed out waiting for sandbox to initialize!')
       return
@@ -134,7 +137,7 @@ class AuroraExecutor(ExecutorBase, Observable):
       self._die(driver, mesos_pb2.TASK_KILLED, 'Task killed during initialization.')
 
     try:
-      deadline(self._runner.start, timeout=self.START_TIMEOUT, propagate=True)
+      propagate_deadline(self._runner.start, timeout=self.START_TIMEOUT)
     except TaskError as e:
       self._die(driver, mesos_pb2.TASK_FAILED, 'Task initialization failed: %s' % e)
       return False
@@ -186,14 +189,20 @@ class AuroraExecutor(ExecutorBase, Observable):
     runner_status = self._runner.status
 
     try:
-      deadline(self._chained_checker.stop, timeout=self.STOP_TIMEOUT)
+      propagate_deadline(self._chained_checker.stop, timeout=self.STOP_TIMEOUT)
     except Timeout:
       log.error('Failed to stop all checkers within deadline.')
+    except Exception:
+      log.error('Failed to stop health checkers:')
+      log.error(traceback.format_exc())
 
     try:
-      deadline(self._runner.stop, timeout=self.STOP_TIMEOUT)
+      propagate_deadline(self._runner.stop, timeout=self.STOP_TIMEOUT)
     except Timeout:
       log.error('Failed to stop runner within deadline.')
+    except Exception:
+      log.error('Failed to stop runner:')
+      log.error(traceback.format_exc())
 
     # If the runner was alive when _shutdown was called, defer to the status_result,
     # otherwise the runner's terminal state is the preferred state.


Mime
View raw message