Return-Path: X-Original-To: apmail-aurora-commits-archive@minotaur.apache.org Delivered-To: apmail-aurora-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 71A5917F0E for ; Mon, 1 Jun 2015 22:20:39 +0000 (UTC) Received: (qmail 73854 invoked by uid 500); 1 Jun 2015 22:20:39 -0000 Delivered-To: apmail-aurora-commits-archive@aurora.apache.org Received: (qmail 73812 invoked by uid 500); 1 Jun 2015 22:20:39 -0000 Mailing-List: contact commits-help@aurora.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@aurora.apache.org Delivered-To: mailing list commits@aurora.apache.org Received: (qmail 73798 invoked by uid 99); 1 Jun 2015 22:20:39 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 01 Jun 2015 22:20:39 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id D3728E056A; Mon, 1 Jun 2015 22:20:38 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: wickman@apache.org To: commits@aurora.apache.org Message-Id: <82ce4f571ff6407bb7bc882427e5b335@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: aurora git commit: Daemonize all deadline calls in aurora executor. Date: Mon, 1 Jun 2015 22:20:38 +0000 (UTC) Repository: aurora Updated Branches: refs/heads/master 827b9abea -> 73ceeb22a Daemonize all deadline calls in aurora executor. If we do not daemonize, it's possible for the aurora executor to send TASK_KILLED and then block indefinitely on shutdown. This way the aurora executor process will at least exit, allow the cgroup to tear down all active processes. Testing Done: ./pants test src/test/python/apache/aurora/executor:: Bugs closed: AURORA-698 Reviewed at https://reviews.apache.org/r/34484/ Project: http://git-wip-us.apache.org/repos/asf/aurora/repo Commit: http://git-wip-us.apache.org/repos/asf/aurora/commit/73ceeb22 Tree: http://git-wip-us.apache.org/repos/asf/aurora/tree/73ceeb22 Diff: http://git-wip-us.apache.org/repos/asf/aurora/diff/73ceeb22 Branch: refs/heads/master Commit: 73ceeb22a18e4b3df3bffb04cf7d58527066fb5a Parents: 827b9ab Author: Brian Wickman Authored: Mon Jun 1 15:20:25 2015 -0700 Committer: Brian Wickman Committed: Mon Jun 1 15:20:25 2015 -0700 ---------------------------------------------------------------------- .../apache/aurora/executor/aurora_executor.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/aurora/blob/73ceeb22/src/main/python/apache/aurora/executor/aurora_executor.py ---------------------------------------------------------------------- diff --git a/src/main/python/apache/aurora/executor/aurora_executor.py b/src/main/python/apache/aurora/executor/aurora_executor.py index df0df0c..7ad179e 100644 --- a/src/main/python/apache/aurora/executor/aurora_executor.py +++ b/src/main/python/apache/aurora/executor/aurora_executor.py @@ -31,6 +31,10 @@ from .executor_base import ExecutorBase from .status_manager import StatusManager +def propagate_deadline(*args, **kw): + return deadline(*args, daemon=True, propagate=True, **kw) + + class AuroraExecutor(ExecutorBase, Observable): PERSISTENCE_WAIT = Amount(5, Time.SECONDS) SANDBOX_INITIALIZATION_TIMEOUT = Amount(10, Time.MINUTES) @@ -118,8 +122,7 @@ class AuroraExecutor(ExecutorBase, Observable): self._sandbox = self._sandbox_provider.from_assigned_task(assigned_task) self.sandbox_initialized.set() try: - deadline(self._sandbox.create, timeout=self.SANDBOX_INITIALIZATION_TIMEOUT, - daemon=True, propagate=True) + propagate_deadline(self._sandbox.create, timeout=self.SANDBOX_INITIALIZATION_TIMEOUT) except Timeout: self._die(driver, mesos_pb2.TASK_FAILED, 'Timed out waiting for sandbox to initialize!') return @@ -134,7 +137,7 @@ class AuroraExecutor(ExecutorBase, Observable): self._die(driver, mesos_pb2.TASK_KILLED, 'Task killed during initialization.') try: - deadline(self._runner.start, timeout=self.START_TIMEOUT, propagate=True) + propagate_deadline(self._runner.start, timeout=self.START_TIMEOUT) except TaskError as e: self._die(driver, mesos_pb2.TASK_FAILED, 'Task initialization failed: %s' % e) return False @@ -186,14 +189,20 @@ class AuroraExecutor(ExecutorBase, Observable): runner_status = self._runner.status try: - deadline(self._chained_checker.stop, timeout=self.STOP_TIMEOUT) + propagate_deadline(self._chained_checker.stop, timeout=self.STOP_TIMEOUT) except Timeout: log.error('Failed to stop all checkers within deadline.') + except Exception: + log.error('Failed to stop health checkers:') + log.error(traceback.format_exc()) try: - deadline(self._runner.stop, timeout=self.STOP_TIMEOUT) + propagate_deadline(self._runner.stop, timeout=self.STOP_TIMEOUT) except Timeout: log.error('Failed to stop runner within deadline.') + except Exception: + log.error('Failed to stop runner:') + log.error(traceback.format_exc()) # If the runner was alive when _shutdown was called, defer to the status_result, # otherwise the runner's terminal state is the preferred state.