aurora-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From wick...@apache.org
Subject incubator-aurora git commit: Stop the announcer and status checkers before starting to kill the runners.
Date Tue, 03 Mar 2015 18:05:37 GMT
Repository: incubator-aurora
Updated Branches:
  refs/heads/master 4cca6a6bd -> 782e3e7fd


Stop the announcer and status checkers before starting to kill the runners.

This allows the task to be removed from the ZK ensemble before it begins
getting killed.  The delay can be significant if the task takes some time to
shutdown, and during the time it stops responding to requests.

Testing Done:
We're now running this in our production environments.  Watching ZK, I can
confirm that the nodes are removed before process shutdown begins.  Watching
the executor log also confirms this.

I couldn't observe any other side effects either.

Reviewed at https://reviews.apache.org/r/31423/


Project: http://git-wip-us.apache.org/repos/asf/incubator-aurora/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-aurora/commit/782e3e7f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-aurora/tree/782e3e7f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-aurora/diff/782e3e7f

Branch: refs/heads/master
Commit: 782e3e7fdd2cef15bcc3b6d83a1ada27752cebd2
Parents: 4cca6a6
Author: Steve Niemitz <steve@tellapart.com>
Authored: Tue Mar 3 10:05:27 2015 -0800
Committer: Brian Wickman <wickman@apache.org>
Committed: Tue Mar 3 10:05:27 2015 -0800

----------------------------------------------------------------------
 .../apache/aurora/executor/aurora_executor.py   |  8 +++---
 src/test/python/apache/aurora/executor/BUILD    |  1 +
 .../aurora/executor/test_thermos_executor.py    | 28 ++++++++++++++++++++
 3 files changed, 33 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/782e3e7f/src/main/python/apache/aurora/executor/aurora_executor.py
----------------------------------------------------------------------
diff --git a/src/main/python/apache/aurora/executor/aurora_executor.py b/src/main/python/apache/aurora/executor/aurora_executor.py
index 9c02823..df0df0c 100644
--- a/src/main/python/apache/aurora/executor/aurora_executor.py
+++ b/src/main/python/apache/aurora/executor/aurora_executor.py
@@ -186,14 +186,14 @@ class AuroraExecutor(ExecutorBase, Observable):
     runner_status = self._runner.status
 
     try:
-      deadline(self._runner.stop, timeout=self.STOP_TIMEOUT)
+      deadline(self._chained_checker.stop, timeout=self.STOP_TIMEOUT)
     except Timeout:
-      log.error('Failed to stop runner within deadline.')
+      log.error('Failed to stop all checkers within deadline.')
 
     try:
-      deadline(self._chained_checker.stop, timeout=self.STOP_TIMEOUT)
+      deadline(self._runner.stop, timeout=self.STOP_TIMEOUT)
     except Timeout:
-      log.error('Failed to stop all checkers within deadline.')
+      log.error('Failed to stop runner within deadline.')
 
     # If the runner was alive when _shutdown was called, defer to the status_result,
     # otherwise the runner's terminal state is the preferred state.

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/782e3e7f/src/test/python/apache/aurora/executor/BUILD
----------------------------------------------------------------------
diff --git a/src/test/python/apache/aurora/executor/BUILD b/src/test/python/apache/aurora/executor/BUILD
index 2ee9b12..b8dd28c 100644
--- a/src/test/python/apache/aurora/executor/BUILD
+++ b/src/test/python/apache/aurora/executor/BUILD
@@ -62,6 +62,7 @@ python_tests(name = 'thermos_executor',
   sources = ['test_thermos_executor.py'],
 #  timeout = Amount(5, Time.MINUTES),
   dependencies = [
+    '3rdparty/python:mock',
     '3rdparty/python:twitter.common.app',
     '3rdparty/python:twitter.common.exceptions',
     '3rdparty/python:twitter.common.quantity',

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/782e3e7f/src/test/python/apache/aurora/executor/test_thermos_executor.py
----------------------------------------------------------------------
diff --git a/src/test/python/apache/aurora/executor/test_thermos_executor.py b/src/test/python/apache/aurora/executor/test_thermos_executor.py
index 8dbfb1d..6cc928e 100644
--- a/src/test/python/apache/aurora/executor/test_thermos_executor.py
+++ b/src/test/python/apache/aurora/executor/test_thermos_executor.py
@@ -12,6 +12,7 @@
 # limitations under the License.
 #
 
+import contextlib
 import getpass
 import os
 import signal
@@ -22,6 +23,7 @@ import time
 from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer
 from collections import defaultdict
 
+import mock
 from mesos.interface import mesos_pb2
 from thrift.TSerialization import serialize
 from twitter.common import log
@@ -44,6 +46,7 @@ from apache.aurora.executor.aurora_executor import AuroraExecutor
 from apache.aurora.executor.common.executor_timeout import ExecutorTimeout
 from apache.aurora.executor.common.health_checker import HealthCheckerProvider
 from apache.aurora.executor.common.sandbox import DirectorySandbox, SandboxProvider
+from apache.aurora.executor.common.status_checker import ChainedStatusChecker
 from apache.aurora.executor.common.task_runner import TaskError
 from apache.aurora.executor.status_manager import StatusManager
 from apache.aurora.executor.thermos_task_runner import (
@@ -376,6 +379,31 @@ class TestThermosExecutor(object):
     assert len(updates) == 3
     assert updates[-1][0][0].state == mesos_pb2.TASK_KILLED
 
+  def test_shutdown_order(self):
+    proxy_driver = ProxyDriver()
+
+    with contextlib.nested(
+        temporary_dir(),
+        mock.patch.object(ChainedStatusChecker, 'stop'),
+        mock.patch.object(ThermosTaskRunner, 'stop')) as (
+            checkpoint_root,
+            status_check_stop,
+            runner_stop):
+
+      parent = mock.MagicMock()
+      parent.attach_mock(status_check_stop, 'status_check_stop')
+      parent.attach_mock(runner_stop, 'runner_stop')
+
+      _, executor = make_executor(proxy_driver,
+          checkpoint_root,
+          SLEEP60_MTI)
+      executor.shutdown(proxy_driver)
+      executor.terminated.wait()
+
+      parent.assert_has_calls(
+          [mock.call.status_check_stop(), mock.call.runner_stop()],
+          any_order=False)
+
   def test_task_health_failed(self):
     proxy_driver = ProxyDriver()
     with SignalServer(UnhealthyHandler) as port:


Mime
View raw message