aurora-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From zma...@apache.org
Subject [1/2] aurora git commit: Change job updates to rely on `health-checks` rather than on `watch_secs`.
Date Thu, 17 Nov 2016 21:59:51 GMT
Repository: aurora
Updated Branches:
  refs/heads/master 05f082a1c -> 2992c8b4d


http://git-wip-us.apache.org/repos/asf/aurora/blob/2992c8b4/src/test/python/apache/aurora/executor/test_status_manager.py
----------------------------------------------------------------------
diff --git a/src/test/python/apache/aurora/executor/test_status_manager.py b/src/test/python/apache/aurora/executor/test_status_manager.py
index ce4679b..e677599 100644
--- a/src/test/python/apache/aurora/executor/test_status_manager.py
+++ b/src/test/python/apache/aurora/executor/test_status_manager.py
@@ -18,33 +18,48 @@ from unittest import TestCase
 import mock
 from mesos.interface.mesos_pb2 import TaskState
 
-from apache.aurora.executor.common.status_checker import StatusChecker
+from apache.aurora.executor.common.status_checker import StatusChecker, StatusResult
 from apache.aurora.executor.status_manager import StatusManager
 
 
 class FakeStatusChecker(StatusChecker):
-  def __init__(self):
+  def __init__(self, status):
     self.call_count = 0
+    self._status = status
 
   @property
   def status(self):
     if self.call_count == 2:
-      return TaskState.Value('TASK_KILLED')
+      return StatusResult('Fake reason', TaskState.Value('TASK_KILLED'))
     self.call_count += 1
-    return None
+    return self._status
 
 
 class TestStatusManager(TestCase):
   def setUp(self):
-    self.callback_called = False
+    self.unhealthy_callback_called = False
+    self.running_callback_called = 0
 
-  def test_run(self):
-    checker = FakeStatusChecker()
-    def callback(result):
-      assert result == TaskState.Value('TASK_KILLED')
-      self.callback_called = True
+  def test_run_with_none_status(self):
+    self.do_test_run_with_status(None, 0)
+
+  def test_run_with_starting_status(self):
+    self.do_test_run_with_status(StatusResult(None, TaskState.Value('TASK_STARTING')), 0)
+
+  def test_run_with_running_status(self):
+    self.do_test_run_with_status(StatusResult(None, TaskState.Value('TASK_RUNNING')), 1)
+
+  def do_test_run_with_status(self, status_result, expected_running_callback_call_count):
+    checker = FakeStatusChecker(status_result)
+    def unhealthy_callback(result):
+      assert result == StatusResult('Fake reason', TaskState.Value('TASK_KILLED'))
+      self.unhealthy_callback_called = True
+    def running_callback(result):
+      assert result == StatusResult(None, TaskState.Value('TASK_RUNNING'))
+      self.running_callback_called += 1
     mock_time = mock.create_autospec(spec=time, instance=True)
-    status_manager = StatusManager(checker, callback, mock_time)
+    status_manager = StatusManager(checker, running_callback, unhealthy_callback, mock_time)
     status_manager.run()
     assert mock_time.sleep.call_count == 2
-    assert self.callback_called is True
+    assert self.unhealthy_callback_called is True
+    assert self.running_callback_called == expected_running_callback_call_count

http://git-wip-us.apache.org/repos/asf/aurora/blob/2992c8b4/src/test/python/apache/aurora/executor/test_thermos_executor.py
----------------------------------------------------------------------
diff --git a/src/test/python/apache/aurora/executor/test_thermos_executor.py b/src/test/python/apache/aurora/executor/test_thermos_executor.py
index 3f82165..e628ccd 100644
--- a/src/test/python/apache/aurora/executor/test_thermos_executor.py
+++ b/src/test/python/apache/aurora/executor/test_thermos_executor.py
@@ -217,7 +217,8 @@ def make_executor(
     ports={},
     fast_status=False,
     runner_class=ThermosTaskRunner,
-    status_providers=()):
+    status_providers=[HealthCheckerProvider()],
+    assert_task_is_running=True):
 
   status_manager_class = FastStatusManager if fast_status else StatusManager
   runner_provider = make_provider(checkpoint_root, runner_class)
@@ -242,14 +243,16 @@ def make_executor(
   assert len(updates) == 2
   status_updates = [arg_tuple[0][0] for arg_tuple in updates]
   assert status_updates[0].state == mesos_pb2.TASK_STARTING
-  assert status_updates[1].state == mesos_pb2.TASK_RUNNING
 
-  # wait for the runner to bind to a task
-  while True:
-    runner = TaskRunner.get(task_description.task_id.value, checkpoint_root)
-    if runner:
-      break
-    time.sleep(0.1)
+  runner = None
+  if assert_task_is_running:
+    assert status_updates[1].state == mesos_pb2.TASK_RUNNING
+    # wait for the runner to bind to a task
+    while True:
+      runner = TaskRunner.get(task_description.task_id.value, checkpoint_root)
+      if runner:
+        break
+      time.sleep(0.1)
 
   assert te.launched.is_set()
   return runner, te
@@ -318,7 +321,8 @@ class TestThermosExecutor(object):
     with temporary_dir() as tempdir:
       te = AuroraExecutor(
           runner_provider=make_provider(tempdir),
-          sandbox_provider=DefaultTestSandboxProvider())
+          sandbox_provider=DefaultTestSandboxProvider(),
+          status_providers=[HealthCheckerProvider()])
       te.launchTask(proxy_driver, make_task(HELLO_WORLD_MTI))
       te.terminated.wait()
       tm = TaskMonitor(tempdir, task_id=HELLO_WORLD_TASK_ID)
@@ -340,7 +344,8 @@ class TestThermosExecutor(object):
     with temporary_dir() as tempdir:
       te = AuroraExecutor(
           runner_provider=make_provider(tempdir),
-          sandbox_provider=DefaultTestSandboxProvider())
+          sandbox_provider=DefaultTestSandboxProvider(),
+          status_providers=[HealthCheckerProvider()])
       te.launchTask(proxy_driver, make_task(MESOS_JOB(task=HELLO_WORLD), instanceId=0))
       te.runner_started.wait()
       while te._status_manager is None:
@@ -448,11 +453,12 @@ class TestThermosExecutor(object):
             MESOS_JOB(task=SLEEP60, health_check_config=health_check_config),
             ports={'health': port},
             fast_status=True,
-            status_providers=(HealthCheckerProvider(),))
+            status_providers=(HealthCheckerProvider(),),
+            assert_task_is_running=False)
         executor.terminated.wait()
 
     updates = proxy_driver.method_calls['sendStatusUpdate']
-    assert len(updates) == 3
+    assert len(updates) == 2
     assert updates[-1][0][0].state == mesos_pb2.TASK_FAILED
 
   def test_task_health_ok(self):

http://git-wip-us.apache.org/repos/asf/aurora/blob/2992c8b4/src/test/sh/org/apache/aurora/e2e/http/http_example.aurora
----------------------------------------------------------------------
diff --git a/src/test/sh/org/apache/aurora/e2e/http/http_example.aurora b/src/test/sh/org/apache/aurora/e2e/http/http_example.aurora
index c9dae28..438eb0f 100644
--- a/src/test/sh/org/apache/aurora/e2e/http/http_example.aurora
+++ b/src/test/sh/org/apache/aurora/e2e/http/http_example.aurora
@@ -30,12 +30,12 @@ GpuProfile = DefaultProfile(
 
 echo_ports = Process(
   name = 'echo_ports',
-  cmdline = 'echo "tcp port: {{thermos.ports[tcp]}}; http port: {{thermos.ports[http]}};
alias: {{thermos.ports[alias]}}"'
+  cmdline = 'echo "tcp port: {{thermos.ports[tcp]}}; http port: {{thermos.ports[http]}};
alias: {{thermos.ports[alias]}}; health: {{thermos.ports[health]}}"'
 )
 
 run_server = Process(
   name = 'run_server',
-  cmdline = 'python http_example.py {{thermos.ports[http]}}')
+  cmdline = 'python http_example.py {{thermos.ports[http]}} {{thermos.ports[health]}}')
 
 stage_server = Process(
   name = 'stage_server',
@@ -71,13 +71,15 @@ no_python_task = SequentialTask(
   ]
 )
 
-update_config = UpdateConfig(watch_secs=10, batch_size=2)
+update_config = UpdateConfig(watch_secs=0, batch_size=2)
+update_config_watch_secs = UpdateConfig(watch_secs=10, batch_size=2)
 health_check_config = HealthCheckConfig(initial_interval_secs=5, interval_secs=1)
 shell_health_check_config = HealthCheckConfig(
   health_checker = HealthCheckerConfig(
     shell = ShellHealthChecker(shell_command='stat /usr/local/bin/run-server.sh')),
   initial_interval_secs=5,
   interval_secs=1,
+  min_consecutive_successes=5
 )
 
 job = Service(
@@ -99,12 +101,16 @@ jobs = [
     name = 'http_example'
   ).bind(profile=DefaultProfile()),
   job(
+    name = 'http_example_watch_secs',
+    update_config = update_config_watch_secs
+  ).bind(profile=DefaultProfile()),
+  job(
     name = 'http_example_revocable',
     tier = 'revocable'
   ).bind(profile=DefaultProfile()),
   job(
     name = 'http_example_docker',
-    container = Docker(image='{{docker.image[http_example][latest]}}'),
+    container = Docker(image='{{docker.image[http_example][latest]}}')
   ).bind(profile=ContainerProfile),
   job(
     name = 'http_example_gpu'

http://git-wip-us.apache.org/repos/asf/aurora/blob/2992c8b4/src/test/sh/org/apache/aurora/e2e/http/http_example_bad_healthcheck.aurora
----------------------------------------------------------------------
diff --git a/src/test/sh/org/apache/aurora/e2e/http/http_example_bad_healthcheck.aurora b/src/test/sh/org/apache/aurora/e2e/http/http_example_bad_healthcheck.aurora
index b85ace4..c826a54 100644
--- a/src/test/sh/org/apache/aurora/e2e/http/http_example_bad_healthcheck.aurora
+++ b/src/test/sh/org/apache/aurora/e2e/http/http_example_bad_healthcheck.aurora
@@ -30,7 +30,7 @@ GpuProfile = DefaultProfile(
 
 run_server = Process(
   name = 'run_server',
-  cmdline = 'python http_example.py {{thermos.ports[http]}}'
+  cmdline = 'python http_example.py {{thermos.ports[http]}} {{thermos.ports[health]}}'
 )
 
 stage_server = Process(
@@ -50,7 +50,8 @@ no_python_task = Task(
   resources = Resources(cpu=0.4, ram=32*MB, disk=64*MB),
   processes = [Process(name='run_server', cmdline='run-server.sh {{thermos.ports[http]}}')])
 
-update_config = UpdateConfig(watch_secs=10, batch_size=2)
+update_config = UpdateConfig(watch_secs=0, batch_size=2)
+update_config_watch_secs = UpdateConfig(watch_secs=10, batch_size=2)
 # "I am going to fail" config.
 shell_config = ShellHealthChecker(
   # This shell validates two things:
@@ -62,6 +63,7 @@ health_check_config = HealthCheckConfig(
   health_checker=HealthCheckerConfig(shell=shell_config),
   initial_interval_secs=5,
   interval_secs=1,
+  min_consecutive_successes=5
 )
 
 job = Service(
@@ -81,6 +83,10 @@ jobs = [
     name = 'http_example'
   ).bind(profile=DefaultProfile()),
   job(
+    name = 'http_example_watch_secs',
+    update_config = update_config_watch_secs
+  ).bind(profile=DefaultProfile()),
+  job(
     name = 'http_example_revocable',
     tier = 'revocable'
   ).bind(profile=DefaultProfile()),

http://git-wip-us.apache.org/repos/asf/aurora/blob/2992c8b4/src/test/sh/org/apache/aurora/e2e/http/http_example_updated.aurora
----------------------------------------------------------------------
diff --git a/src/test/sh/org/apache/aurora/e2e/http/http_example_updated.aurora b/src/test/sh/org/apache/aurora/e2e/http/http_example_updated.aurora
index b3caa41..b85907a 100644
--- a/src/test/sh/org/apache/aurora/e2e/http/http_example_updated.aurora
+++ b/src/test/sh/org/apache/aurora/e2e/http/http_example_updated.aurora
@@ -30,7 +30,7 @@ GpuProfile = DefaultProfile(
 
 run_server = Process(
   name = 'run_server',
-  cmdline = 'python http_example.py {{thermos.ports[http]}}')
+  cmdline = 'python http_example.py {{thermos.ports[http]}} {{thermos.ports[health]}}')
 
 stage_server = Process(
   name = 'stage_server',
@@ -47,8 +47,16 @@ no_python_task = Task(
   resources = Resources(cpu=0.4, ram=32*MB, disk=64*MB),
   processes = [Process(name='run_server', cmdline='run-server.sh {{thermos.ports[http]}}')])
 
-update_config = UpdateConfig(watch_secs=10, batch_size=3)
-health_check_config = HealthCheckConfig(initial_interval_secs=5, interval_secs=1)
+update_config = UpdateConfig(watch_secs=0, batch_size=3)
+update_config_watch_secs = UpdateConfig(watch_secs=10, batch_size=3)
+health_check_config = HealthCheckConfig(initial_interval_secs=5, interval_secs=2, min_consecutive_successes=15)
+shell_health_check_config = HealthCheckConfig(
+  health_checker = HealthCheckerConfig(
+    shell = ShellHealthChecker(shell_command='stat /usr/local/bin/run-server.sh')),
+  initial_interval_secs=5,
+  interval_secs=2,
+  min_consecutive_successes=15
+)
 
 job = Service(
   cluster = 'devcluster',
@@ -67,12 +75,16 @@ jobs = [
     name = 'http_example'
   ).bind(profile=DefaultProfile()),
   job(
+    name = 'http_example_watch_secs',
+    update_config = update_config_watch_secs
+  ).bind(profile=DefaultProfile()),
+  job(
     name = 'http_example_revocable',
     tier = 'revocable'
   ).bind(profile=DefaultProfile()),
   job(
     name = 'http_example_docker',
-    container = Docker(image='{{docker.image[http_example][latest]}}'),
+    container = Docker(image='{{docker.image[http_example][latest]}}')
   ).bind(profile=ContainerProfile),
   job(
     name = 'http_example_gpu'
@@ -80,11 +92,13 @@ jobs = [
   job(
     name = 'http_example_unified_appc',
     container = Mesos(image=AppcImage(name='http_example_netcat', image_id='{{appc_image_id}}')),
-    task = no_python_task
+    task = no_python_task,
+    health_check_config=shell_health_check_config
   ).bind(profile=DefaultProfile()),
   job(
     name = 'http_example_unified_docker',
     container = Mesos(image=DockerImage(name='http_example_netcat', tag='latest')),
-    task = no_python_task
+    task = no_python_task,
+    health_check_config=shell_health_check_config
   ).bind(profile=DefaultProfile())
 ]

http://git-wip-us.apache.org/repos/asf/aurora/blob/2992c8b4/src/test/sh/org/apache/aurora/e2e/http_example.py
----------------------------------------------------------------------
diff --git a/src/test/sh/org/apache/aurora/e2e/http_example.py b/src/test/sh/org/apache/aurora/e2e/http_example.py
index 675ece8..ba7d114 100644
--- a/src/test/sh/org/apache/aurora/e2e/http_example.py
+++ b/src/test/sh/org/apache/aurora/e2e/http_example.py
@@ -14,7 +14,9 @@
 from __future__ import print_function
 
 from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer
+from SocketServer import ThreadingMixIn
 from sys import argv
+from threading import Thread
 
 
 class RequestHandler(BaseHTTPRequestHandler):
@@ -25,6 +27,25 @@ class RequestHandler(BaseHTTPRequestHandler):
     self.wfile.write('Hello!\n')
 
 
-server = HTTPServer(('', int(argv[1])), RequestHandler)
-print('Listening on port %s' % argv[1])
-server.serve_forever()
+class HealthHandler(BaseHTTPRequestHandler):
+  def do_GET(self):
+    if self.path == '/health':
+      self.send_response(200)
+      self.send_header('Content-Type', 'text/plain')
+      self.end_headers()
+      self.wfile.write('ok')
+
+
+def start_server(port, handler_class):
+  server = HTTPServer(('', port), handler_class)
+  print('Listening on port %s' % port)
+  server.serve_forever()
+
+request_thread = Thread(target=start_server, args=[int(argv[1]), RequestHandler])
+health_thread = Thread(target=start_server, args=[int(argv[2]), HealthHandler])
+
+for thread in [request_thread, health_thread]:
+  thread.start()
+
+for thread in [request_thread, health_thread]:
+  thread.join()

http://git-wip-us.apache.org/repos/asf/aurora/blob/2992c8b4/src/test/sh/org/apache/aurora/e2e/test_end_to_end.sh
----------------------------------------------------------------------
diff --git a/src/test/sh/org/apache/aurora/e2e/test_end_to_end.sh b/src/test/sh/org/apache/aurora/e2e/test_end_to_end.sh
index 9cc6cec..4630937 100755
--- a/src/test/sh/org/apache/aurora/e2e/test_end_to_end.sh
+++ b/src/test/sh/org/apache/aurora/e2e/test_end_to_end.sh
@@ -33,7 +33,7 @@ _curl() { curl --silent --fail --retry 4 --retry-delay 10 "$@" ; }
 tear_down() {
   set +x  # Disable command echo, as this makes it more difficult see which command failed.
 
-  for job in http_example http_example_revocable http_example_docker http_example_unified_appc
http_example_unified_docker; do
+  for job in http_example http_example_watch_secs http_example_revocable http_example_docker
http_example_unified_appc http_example_unified_docker; do
     aurora update abort devcluster/vagrant/test/$job || true >/dev/null 2>&1
     aurora job killall --no-batching devcluster/vagrant/test/$job >/dev/null 2>&1
   done
@@ -496,6 +496,7 @@ TEST_CLUSTER=devcluster
 TEST_ROLE=vagrant
 TEST_ENV=test
 TEST_JOB=http_example
+TEST_JOB_WATCH_SECS=http_example_watch_secs
 TEST_JOB_REVOCABLE=http_example_revocable
 TEST_JOB_GPU=http_example_gpu
 TEST_JOB_DOCKER=http_example_docker
@@ -520,6 +521,8 @@ BASE_ARGS=(
 
 TEST_JOB_ARGS=("${BASE_ARGS[@]}" "$TEST_JOB")
 
+TEST_JOB_WATCH_SECS_ARGS=("${BASE_ARGS[@]}" "$TEST_JOB_WATCH_SECS")
+
 TEST_JOB_REVOCABLE_ARGS=("${BASE_ARGS[@]}" "$TEST_JOB_REVOCABLE")
 
 TEST_JOB_GPU_ARGS=("${BASE_ARGS[@]}" "$TEST_JOB_GPU")
@@ -552,6 +555,7 @@ setup_docker_registry
 
 test_version
 test_http_example "${TEST_JOB_ARGS[@]}"
+test_http_example "${TEST_JOB_WATCH_SECS_ARGS[@]}"
 test_health_check
 
 test_http_example_basic "${TEST_JOB_REVOCABLE_ARGS[@]}"


Mime
View raw message