aurora-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ma...@apache.org
Subject git commit: Changing maintenance command to drain SLA-compliant hosts in a group.
Date Thu, 26 Jun 2014 20:27:27 GMT
Repository: incubator-aurora
Updated Branches:
  refs/heads/master 0b2502642 -> 06656cc14


Changing maintenance command to drain SLA-compliant hosts in a group.

Bugs closed: AURORA-542

Reviewed at https://reviews.apache.org/r/22842/


Project: http://git-wip-us.apache.org/repos/asf/incubator-aurora/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-aurora/commit/06656cc1
Tree: http://git-wip-us.apache.org/repos/asf/incubator-aurora/tree/06656cc1
Diff: http://git-wip-us.apache.org/repos/asf/incubator-aurora/diff/06656cc1

Branch: refs/heads/master
Commit: 06656cc14644791944c1c90d10eeb9d94c6f1665
Parents: 0b25026
Author: Maxim Khutornenko <maxim@apache.org>
Authored: Thu Jun 26 13:27:02 2014 -0700
Committer: Maxim Khutornenko <maxim@apache.org>
Committed: Thu Jun 26 13:27:02 2014 -0700

----------------------------------------------------------------------
 .../python/apache/aurora/admin/admin_util.py    |   6 +-
 .../apache/aurora/admin/host_maintenance.py     |  58 ++++++--
 .../apache/aurora/client/commands/admin.py      |   3 +-
 .../aurora/client/commands/maintenance.py       |  20 ++-
 .../aurora/admin/test_host_maintenance.py       |  59 ++++++++-
 .../aurora/client/commands/test_admin_sla.py    |   4 +-
 .../aurora/client/commands/test_maintenance.py  | 132 +++++++++++--------
 .../apache/aurora/client/commands/util.py       |  12 +-
 8 files changed, 202 insertions(+), 92 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/06656cc1/src/main/python/apache/aurora/admin/admin_util.py
----------------------------------------------------------------------
diff --git a/src/main/python/apache/aurora/admin/admin_util.py b/src/main/python/apache/aurora/admin/admin_util.py
index d8517e9..04eecb7 100644
--- a/src/main/python/apache/aurora/admin/admin_util.py
+++ b/src/main/python/apache/aurora/admin/admin_util.py
@@ -163,11 +163,12 @@ def format_sla_results(host_groups, unsafe_only=False):
   :type host_groups: list of (defaultdict(list))
   :param unsafe_only: If True, includes only SLA-"unsafe" hosts from the results
   :type unsafe_only: bool
-  :rtype: list of string
+  :rtype: a tuple of: list of output strings, set of hostnames included in output.
   """
   results = []
   include_unsafe_only = lambda d: not d.safe if unsafe_only else True
 
+  hostnames = set()
   for group in host_groups:
     for host, job_details in sorted(group.items()):
       host_details = '\n'.join(
@@ -180,4 +181,5 @@ def format_sla_results(host_groups, unsafe_only=False):
               for d in sorted(job_details) if include_unsafe_only(d)])
       if host_details:
         results.append(host_details)
-  return results
+        hostnames.add(host)
+  return results, hostnames

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/06656cc1/src/main/python/apache/aurora/admin/host_maintenance.py
----------------------------------------------------------------------
diff --git a/src/main/python/apache/aurora/admin/host_maintenance.py b/src/main/python/apache/aurora/admin/host_maintenance.py
index 3c04773..b5b1375 100644
--- a/src/main/python/apache/aurora/admin/host_maintenance.py
+++ b/src/main/python/apache/aurora/admin/host_maintenance.py
@@ -117,12 +117,11 @@ class HostMaintenance(object):
     :type percentage: float
     :param duration: SLA uptime duration override
     :type duration: twitter.common.quantity.Amount
-    :rtype: True if all hosts pass SLA check, False otherwise.
+    :rtype: set of unsafe hosts
     """
     sla_percentage = percentage or self.SLA_UPTIME_PERCENTAGE_LIMIT
     sla_duration = duration or self.SLA_UPTIME_DURATION_LIMIT
 
-    log.info('Beginning SLA check for %s' % hostnames)
     vector = self._client.sla_get_safe_domain_vector(self.SLA_MIN_JOB_INSTANCE_COUNT, hostnames)
     host_groups = vector.probe_hosts(
       sla_percentage,
@@ -133,17 +132,15 @@ class HostMaintenance(object):
     # should be considered a batch failure.
     if host_groups:
       if len(host_groups) > 1:
-        log.error('Illegal multiple groups detected in SLA results. Skipping hosts:%s' %
hostnames)
+        log.error('Illegal multiple groups detected in SLA results. Skipping hosts: %s' %
hostnames)
         return False
 
-      results = format_sla_results(host_groups, unsafe_only=True)
+      results, unsafe_hostnames = format_sla_results(host_groups, unsafe_only=True)
       if results:
         print_results(results)
-        log.warning('Some hosts in a group did not pass SLA check. Skipping group:%s' % hostnames)
-        return False
+        return unsafe_hostnames
 
-    log.info('All hosts passed SLA check.')
-    return True
+    return unsafe_hostnames
 
   def end_maintenance(self, hostnames):
     """Pull a list of hostnames out of maintenance mode.
@@ -165,7 +162,7 @@ class HostMaintenance(object):
     check_and_log_response(self._client.start_maintenance(Hosts(set(hostnames))))
 
   def perform_maintenance(self, hostnames, grouping_function=DEFAULT_GROUPING,
-                          callback=None, percentage=None, duration=None):
+                          callback=None, percentage=None, duration=None, output_file=None):
     """Wrap a callback in between sending hosts into maintenance mode and back.
 
     Walk through the process of putting hosts into maintenance, draining them of tasks,
@@ -174,25 +171,58 @@ class HostMaintenance(object):
 
     :param hostnames: A list of hosts to operate upon
     :type hostnames: list of strings
-    :param groups_per_batch: Number of groups (by default, hosts) to operate on at once
-    :type groups_per_batch: int
     :param grouping_function: How to split up the hostname into groups
     :type grouping_function: function
     :param callback: Function to call once hosts are drained
     :type callback: function
+    :param percentage: SLA percentage to use
+    :type percentage: float
+    :param duration: SLA duration to use
+    :type duration: twitter.common.quantity.Time
+    :param output_file: file to write hosts that were not drained due to failed SLA check
+    :type output_file: string
     """
     self.start_maintenance(hostnames)
+    not_drained_hostnames = set()
 
     for hosts in self.iter_batches(hostnames, grouping_function):
-      if not self._check_sla(list(hosts.hostNames), grouping_function, percentage, duration):
-        self._complete_maintenance(hosts)
-        continue
+      log.info('Beginning SLA check for %s' % hosts.hostNames)
+      unsafe_hostnames = self._check_sla(
+          list(hosts.hostNames),
+          grouping_function,
+          percentage,
+          duration)
+
+      if unsafe_hostnames:
+        log.warning('Some hosts did not pass SLA check and will not be drained! '
+                    'Skipping hosts: %s' % unsafe_hostnames)
+        self._complete_maintenance(Hosts(unsafe_hostnames))
+        not_drained_hostnames |= unsafe_hostnames
+        drainable_hostnames = hosts.hostNames - unsafe_hostnames
+        if not drainable_hostnames:
+          continue
+        hosts = Hosts(drainable_hostnames)
+      else:
+        log.info('All hosts passed SLA check.')
 
       self._drain_hosts(hosts)
       if callback:
         self._operate_on_hosts(hosts, callback)
       self._complete_maintenance(hosts)
 
+    if not_drained_hostnames:
+      output = '\n'.join(list(not_drained_hostnames))
+      log.info('The following hosts did not pass SLA check and were not drained:')
+      print(output)
+      if output_file:
+        try:
+          with open(output_file, 'w') as fp:
+            fp.write(output)
+            fp.write('\n')
+          log.info('Written unsafe host names into: %s' % output_file)
+        except IOError as e:
+          log.error('Failed to write into the output file: %s' % e)
+
   def check_status(self, hostnames):
     """Query the scheduler to determine the maintenance status for a list of hostnames
 

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/06656cc1/src/main/python/apache/aurora/client/commands/admin.py
----------------------------------------------------------------------
diff --git a/src/main/python/apache/aurora/client/commands/admin.py b/src/main/python/apache/aurora/client/commands/admin.py
index 022e3d6..bc9a9ee 100644
--- a/src/main/python/apache/aurora/client/commands/admin.py
+++ b/src/main/python/apache/aurora/client/commands/admin.py
@@ -486,7 +486,8 @@ def sla_probe_hosts(cluster, percentage, duration):
       options.verbosity).sla_get_safe_domain_vector(options.min_instance_count, hosts)
   groups = vector.probe_hosts(sla_percentage, sla_duration.as_(Time.SECONDS), options.grouping)
 
-  print_results(format_sla_results(groups))
+  output, _ = format_sla_results(groups)
+  print_results(output)
 
 
 @app.command

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/06656cc1/src/main/python/apache/aurora/client/commands/maintenance.py
----------------------------------------------------------------------
diff --git a/src/main/python/apache/aurora/client/commands/maintenance.py b/src/main/python/apache/aurora/client/commands/maintenance.py
index e2ac59a..a54f0f6 100644
--- a/src/main/python/apache/aurora/client/commands/maintenance.py
+++ b/src/main/python/apache/aurora/client/commands/maintenance.py
@@ -70,7 +70,10 @@ def end_maintenance_hosts(cluster):
          'See sla_probe_hosts and sla_list_safe_domain commands '
          'for more details on SLA.' % HostMaintenance.SLA_UPTIME_DURATION_LIMIT)
 @app.command_option('--override_reason', dest='reason', default=None,
-    help='Reason for overriding default SLA values.')
+    help='Reason for overriding default SLA values. Provide details including the '
+         'maintenance ticket number.')
+@app.command_option('--unsafe_hosts_file', dest='unsafe_hosts_filename', default=None,
+    help='Output file to write host names that did not pass SLA check.')
 @app.command_option(FILENAME_OPTION)
 @app.command_option(HOSTS_OPTION)
 @app.command_option(GROUPING_OPTION)
@@ -82,6 +85,7 @@ def perform_maintenance_hosts(cluster):
                                       [--override_percentage=percentage]
                                       [--override_duration=duration]
                                       [--override_reason=reason]
+                                      [--unsafe_hosts_file=unsafe_hosts_filename]
                                       cluster
 
   Asks the scheduler to remove any running tasks from the machine and remove it
@@ -100,7 +104,16 @@ def perform_maintenance_hosts(cluster):
   percentage = parse_sla_percentage(options.percentage) if options.percentage else None
   duration = parse_time(options.duration) if options.duration else None
   if options.reason:
-    log_admin_message(logging.WARNING, options.reason)
+    log_admin_message(
+        logging.WARNING,
+        'Default SLA values (percentage: %s, duration: %s) are overridden for the following
'
+        'hosts: %s. New percentage: %s, duration: %s, override reason: %s' % (
+            HostMaintenance.SLA_UPTIME_PERCENTAGE_LIMIT,
+            HostMaintenance.SLA_UPTIME_DURATION_LIMIT,
+            drainable_hosts,
+            percentage,
+            duration,
+            options.reason))
 
   drained_callback = parse_script(options.post_drain_script)
 
@@ -109,7 +122,8 @@ def perform_maintenance_hosts(cluster):
       grouping_function=options.grouping,
       callback=drained_callback,
       percentage=percentage,
-      duration=duration)
+      duration=duration,
+      output_file=options.unsafe_hosts_filename)
 
 
 @app.command

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/06656cc1/src/test/python/apache/aurora/admin/test_host_maintenance.py
----------------------------------------------------------------------
diff --git a/src/test/python/apache/aurora/admin/test_host_maintenance.py b/src/test/python/apache/aurora/admin/test_host_maintenance.py
index 8abce0e..34578ae 100644
--- a/src/test/python/apache/aurora/admin/test_host_maintenance.py
+++ b/src/test/python/apache/aurora/admin/test_host_maintenance.py
@@ -15,9 +15,11 @@
 import copy
 import time
 import unittest
+from contextlib import contextmanager
 
 import mock
 from twitter.common import log
+from twitter.common.contextutil import temporary_file
 from twitter.common.quantity import Time
 
 from apache.aurora.admin.host_maintenance import HostMaintenance
@@ -149,7 +151,7 @@ class TestHostMaintenance(unittest.TestCase):
   def test_perform_maintenance(self, mock_check_sla, mock_start_maintenance,
       mock_drain_hosts, mock_operate_on_hosts, mock_complete_maintenance):
     mock_callback = mock.Mock()
-    mock_check_sla.return_value = True
+    mock_check_sla.return_value = set()
     maintenance = HostMaintenance(DEFAULT_CLUSTER, 'quiet')
     maintenance.perform_maintenance(TEST_HOSTNAMES, callback=mock_callback)
     mock_start_maintenance.assert_called_once_with(TEST_HOSTNAMES)
@@ -164,6 +166,47 @@ class TestHostMaintenance(unittest.TestCase):
     assert mock_complete_maintenance.call_args_list == [
         mock.call(Hosts(set([hostname]))) for hostname in TEST_HOSTNAMES]
 
+  @mock.patch("apache.aurora.admin.host_maintenance.HostMaintenance._complete_maintenance",
+              spec=HostMaintenance._complete_maintenance)
+  @mock.patch("apache.aurora.admin.host_maintenance.HostMaintenance._operate_on_hosts",
+              spec=HostMaintenance._operate_on_hosts)
+  @mock.patch("apache.aurora.admin.host_maintenance.HostMaintenance._drain_hosts",
+              spec=HostMaintenance._drain_hosts)
+  @mock.patch("apache.aurora.admin.host_maintenance.HostMaintenance.start_maintenance",
+              spec=HostMaintenance.start_maintenance)
+  @mock.patch("apache.aurora.admin.host_maintenance.HostMaintenance._check_sla",
+              spec=HostMaintenance._check_sla)
+  def test_perform_maintenance_partial_sla_failure(self, mock_check_sla, mock_start_maintenance,
+                               mock_drain_hosts, mock_operate_on_hosts, mock_complete_maintenance):
+    mock_callback = mock.Mock()
+    failed_host = 'us-west-001.example.com'
+    mock_check_sla.return_value = set([failed_host])
+    drained_hosts = set(TEST_HOSTNAMES) - set([failed_host])
+    maintenance = HostMaintenance(DEFAULT_CLUSTER, 'quiet')
+
+    with temporary_file() as fp:
+      with group_by_rack():
+        maintenance.perform_maintenance(
+            TEST_HOSTNAMES,
+            callback=mock_callback,
+            grouping_function='by_rack',
+            output_file=fp.name)
+
+        with open(fp.name, 'r') as fpr:
+          content = fpr.read()
+          assert failed_host in content
+
+        mock_start_maintenance.assert_called_once_with(TEST_HOSTNAMES)
+        assert mock_check_sla.call_count == 1
+        assert mock_drain_hosts.call_count == 1
+        assert mock_drain_hosts.call_args_list == [mock.call(Hosts(drained_hosts))]
+        assert mock_operate_on_hosts.call_count == 1
+        assert mock_operate_on_hosts.call_args_list == [
+            mock.call(Hosts(drained_hosts), mock_callback)]
+        assert mock_complete_maintenance.call_count == 2
+        assert mock_complete_maintenance.call_args_list == [
+            mock.call(Hosts(set([failed_host]))), mock.call(Hosts(drained_hosts))]
+
   @mock.patch("apache.aurora.client.api.AuroraClientAPI.maintenance_status",
       spec=AuroraClientAPI.maintenance_status)
   def test_check_status(self, mock_maintenance_status):
@@ -197,13 +240,18 @@ def test_default_grouping():
   assert batches[2] == Hosts(set(['xyz321.example.com']))
 
 
+@contextmanager
+def group_by_rack():
+  add_grouping('by_rack', rack_grouping)
+  yield
+  remove_grouping('by_rack')
+
+
 def rack_grouping(hostname):
   return hostname.split('-')[1]
 
 
 def test_rack_grouping():
-  add_grouping('by_rack', rack_grouping)
-
   example_host_list = [
     'west-aaa-001.example.com',
     'west-aaa-002.example.com',
@@ -212,7 +260,7 @@ def test_rack_grouping():
     'east-xyz-004.example.com',
   ]
 
-  try:
+  with group_by_rack():
     batches = list(HostMaintenance.iter_batches(example_host_list, 'by_rack'))
     assert batches[0] == Hosts(set([
         'west-aaa-001.example.com',
@@ -223,6 +271,3 @@ def test_rack_grouping():
         'east-xyz-003.example.com',
         'east-xyz-004.example.com',
     ]))
-
-  finally:
-    remove_grouping('by_rack')

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/06656cc1/src/test/python/apache/aurora/client/commands/test_admin_sla.py
----------------------------------------------------------------------
diff --git a/src/test/python/apache/aurora/client/commands/test_admin_sla.py b/src/test/python/apache/aurora/client/commands/test_admin_sla.py
index be380df..994b123 100644
--- a/src/test/python/apache/aurora/client/commands/test_admin_sla.py
+++ b/src/test/python/apache/aurora/client/commands/test_admin_sla.py
@@ -324,7 +324,7 @@ class TestAdminSlaProbeHostsCommand(AuroraClientCommandTest):
     """Tests successful execution of the sla_probe_hosts command with host list."""
     hosts = ['h0', 'h1']
     mock_options = self.setup_mock_options(hosts=','.join(hosts))
-    mock_vector = self.create_mock_probe_hosts_vector(self.create_probe_hosts(2, 80, True,
0))
+    mock_vector = self.create_mock_probe_hosts_vector([self.create_probe_hosts(2, 80, True,
0)])
     with contextlib.nested(
         patch('apache.aurora.client.commands.admin.AuroraClientAPI',
             new=Mock(spec=AuroraClientAPI)),
@@ -350,7 +350,7 @@ class TestAdminSlaProbeHostsCommand(AuroraClientCommandTest):
 
   def test_probe_hosts_with_file(self):
     """Tests successful execution of the sla_probe_hosts command with host filename."""
-    mock_vector = self.create_mock_probe_hosts_vector(self.create_probe_hosts(1, 80, False,
None))
+    mock_vector = self.create_mock_probe_hosts_vector([self.create_probe_hosts(1, 80, False,
None)])
     with temporary_file() as fp:
       fp.write('h0')
       fp.flush()

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/06656cc1/src/test/python/apache/aurora/client/commands/test_maintenance.py
----------------------------------------------------------------------
diff --git a/src/test/python/apache/aurora/client/commands/test_maintenance.py b/src/test/python/apache/aurora/client/commands/test_maintenance.py
index 642c235..c4677fc 100644
--- a/src/test/python/apache/aurora/client/commands/test_maintenance.py
+++ b/src/test/python/apache/aurora/client/commands/test_maintenance.py
@@ -15,6 +15,7 @@
 import contextlib
 
 from mock import Mock, patch
+from twitter.common.contextutil import temporary_file
 
 from apache.aurora.client.commands.maintenance import (
     end_maintenance_hosts,
@@ -125,7 +126,11 @@ class TestMaintenanceCommands(AuroraClientCommandTest):
     mock_scheduler_proxy.maintenanceStatus.side_effect = host_status_results
     mock_scheduler_proxy.startMaintenance.return_value = self.create_start_maintenance_result()
     mock_scheduler_proxy.drainHosts.return_value = self.create_start_maintenance_result()
-    mock_vector = self.create_mock_probe_hosts_vector(self.create_probe_hosts(1, 95, True,
None))
+    mock_vector = self.create_mock_probe_hosts_vector([
+        self.create_probe_hosts(self.HOSTNAMES[0], 95, True, None),
+        self.create_probe_hosts(self.HOSTNAMES[1], 95, True, None),
+        self.create_probe_hosts(self.HOSTNAMES[2], 95, True, None)
+    ])
 
     with contextlib.nested(
         patch('time.sleep'),
@@ -149,65 +154,80 @@ class TestMaintenanceCommands(AuroraClientCommandTest):
       assert mock_scheduler_proxy.endMaintenance.call_count == 3
 
   def test_perform_maintenance_hosts_failed_default_sla(self):
-    mock_options = self.make_mock_options()
-    mock_options.post_drain_script = None
-    mock_options.grouping = 'by_host'
-
-    def host_status_results(hostnames):
-      if isinstance(hostnames, Hosts):
-        return self.create_drained_status_result(hostnames)
-      return self.create_maintenance_status_result()
-
-    mock_api, mock_scheduler_proxy = self.create_mock_api()
-    mock_scheduler_proxy.endMaintenance.return_value = self.create_end_maintenance_result()
-    mock_scheduler_proxy.maintenanceStatus.side_effect = host_status_results
-    mock_scheduler_proxy.startMaintenance.return_value = self.create_start_maintenance_result()
-    mock_scheduler_proxy.drainHosts.return_value = self.create_start_maintenance_result()
-    mock_vector = self.create_mock_probe_hosts_vector(self.create_probe_hosts(1, 95, False,
None))
-
-    with contextlib.nested(
-        patch('time.sleep'),
-        patch('apache.aurora.client.api.SchedulerProxy', return_value=mock_scheduler_proxy),
-        patch('apache.aurora.client.api.sla.Sla.get_domain_uptime_vector',
-              return_value=mock_vector),
-        patch('apache.aurora.client.commands.maintenance.CLUSTERS', new=self.TEST_CLUSTERS),
-        patch('twitter.common.app.get_options', return_value=mock_options)):
-      perform_maintenance_hosts([self.TEST_CLUSTER])
+    with temporary_file() as fp:
+      mock_options = self.make_mock_options()
+      mock_options.post_drain_script = None
+      mock_options.grouping = 'by_host'
+      mock_options.unsafe_hosts_filename = fp.name
+
+      def host_status_results(hostnames):
+        if isinstance(hostnames, Hosts):
+          return self.create_drained_status_result(hostnames)
+        return self.create_maintenance_status_result()
+
+      mock_api, mock_scheduler_proxy = self.create_mock_api()
+      mock_scheduler_proxy.endMaintenance.return_value = self.create_end_maintenance_result()
+      mock_scheduler_proxy.maintenanceStatus.side_effect = host_status_results
+      mock_scheduler_proxy.startMaintenance.return_value = self.create_start_maintenance_result()
+      mock_scheduler_proxy.drainHosts.return_value = self.create_start_maintenance_result()
+      mock_vector = self.create_mock_probe_hosts_vector([
+          self.create_probe_hosts(self.HOSTNAMES[0], 95, False, None),
+          self.create_probe_hosts(self.HOSTNAMES[1], 95, False, None),
+          self.create_probe_hosts(self.HOSTNAMES[2], 95, False, None)
+      ])
+
+      with contextlib.nested(
+          patch('time.sleep'),
+          patch('apache.aurora.client.api.SchedulerProxy', return_value=mock_scheduler_proxy),
+          patch('apache.aurora.client.api.sla.Sla.get_domain_uptime_vector',
+                return_value=mock_vector),
+          patch('apache.aurora.client.commands.maintenance.CLUSTERS', new=self.TEST_CLUSTERS),
+          patch('twitter.common.app.get_options', return_value=mock_options)):
+        perform_maintenance_hosts([self.TEST_CLUSTER])
 
-      mock_scheduler_proxy.startMaintenance.assert_called_with(Hosts(set(self.HOSTNAMES)))
-      assert mock_scheduler_proxy.endMaintenance.call_count == len(self.HOSTNAMES)
+        mock_scheduler_proxy.startMaintenance.assert_called_with(Hosts(set(self.HOSTNAMES)))
+        assert mock_scheduler_proxy.endMaintenance.call_count == len(self.HOSTNAMES)
 
   def test_perform_maintenance_hosts_failed_custom_sla(self):
-    mock_options = self.make_mock_options()
-    mock_options.post_drain_script = None
-    mock_options.grouping = 'by_host'
-    mock_options.percentage = 50
-    mock_options.duration = '10m'
-    mock_options.reason = 'Test overrides'
-
-    def host_status_results(hostnames):
-      if isinstance(hostnames, Hosts):
-        return self.create_drained_status_result(hostnames)
-      return self.create_maintenance_status_result()
-
-    mock_api, mock_scheduler_proxy = self.create_mock_api()
-    mock_scheduler_proxy.endMaintenance.return_value = self.create_end_maintenance_result()
-    mock_scheduler_proxy.maintenanceStatus.side_effect = host_status_results
-    mock_scheduler_proxy.startMaintenance.return_value = self.create_start_maintenance_result()
-    mock_scheduler_proxy.drainHosts.return_value = self.create_start_maintenance_result()
-    mock_vector = self.create_mock_probe_hosts_vector(self.create_probe_hosts(1, 95, False,
None))
-
-    with contextlib.nested(
-        patch('time.sleep'),
-        patch('apache.aurora.client.api.SchedulerProxy', return_value=mock_scheduler_proxy),
-        patch('apache.aurora.client.api.sla.Sla.get_domain_uptime_vector',
-              return_value=mock_vector),
-        patch('apache.aurora.client.commands.maintenance.CLUSTERS', new=self.TEST_CLUSTERS),
-        patch('twitter.common.app.get_options', return_value=mock_options)):
-      perform_maintenance_hosts([self.TEST_CLUSTER])
+    with temporary_file() as fp:
+      mock_options = self.make_mock_options()
+      mock_options.post_drain_script = None
+      mock_options.grouping = 'by_host'
+      mock_options.percentage = 50
+      mock_options.duration = '10m'
+      mock_options.reason = 'Test overrides'
+      mock_options.unsafe_hosts_filename = fp.name
+
+      def host_status_results(hostnames):
+        if isinstance(hostnames, Hosts):
+          return self.create_drained_status_result(hostnames)
+        return self.create_maintenance_status_result()
+
+      mock_api, mock_scheduler_proxy = self.create_mock_api()
+      mock_scheduler_proxy.endMaintenance.return_value = self.create_end_maintenance_result()
+      mock_scheduler_proxy.maintenanceStatus.side_effect = host_status_results
+      mock_scheduler_proxy.startMaintenance.return_value = self.create_start_maintenance_result()
+      mock_scheduler_proxy.drainHosts.return_value = self.create_start_maintenance_result()
+      mock_vector = self.create_mock_probe_hosts_vector([
+          self.create_probe_hosts(self.HOSTNAMES[0], 95, False, None),
+          self.create_probe_hosts(self.HOSTNAMES[1], 95, False, None),
+          self.create_probe_hosts(self.HOSTNAMES[2], 95, False, None)
+      ])
+
+      with contextlib.nested(
+          patch('time.sleep'),
+          patch('apache.aurora.client.api.SchedulerProxy', return_value=mock_scheduler_proxy),
+          patch('apache.aurora.client.api.sla.Sla.get_domain_uptime_vector',
+                return_value=mock_vector),
+          patch('apache.aurora.client.commands.maintenance.CLUSTERS', new=self.TEST_CLUSTERS),
+          patch('apache.aurora.client.commands.maintenance.log_admin_message'),
+          patch('twitter.common.app.get_options', return_value=mock_options)) as (
+              _, _, _, _, log, _):
+        perform_maintenance_hosts([self.TEST_CLUSTER])
 
-      mock_scheduler_proxy.startMaintenance.assert_called_with(Hosts(set(self.HOSTNAMES)))
-      assert mock_scheduler_proxy.endMaintenance.call_count == len(self.HOSTNAMES)
+        assert 'Test overrides' in log.call_args[0][1]
+        mock_scheduler_proxy.startMaintenance.assert_called_with(Hosts(set(self.HOSTNAMES)))
+        assert mock_scheduler_proxy.endMaintenance.call_count == len(self.HOSTNAMES)
 
   def test_perform_maintenance_hosts_reason_missing(self):
     mock_options = self.make_mock_options()

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/06656cc1/src/test/python/apache/aurora/client/commands/util.py
----------------------------------------------------------------------
diff --git a/src/test/python/apache/aurora/client/commands/util.py b/src/test/python/apache/aurora/client/commands/util.py
index b1822f2..e24f130 100644
--- a/src/test/python/apache/aurora/client/commands/util.py
+++ b/src/test/python/apache/aurora/client/commands/util.py
@@ -131,16 +131,14 @@ jobs = [HELLO_WORLD]
         bad_clause)
 
   @classmethod
-  def create_mock_probe_hosts_vector(cls, result):
+  def create_mock_probe_hosts_vector(cls, side_effects):
     mock_vector = Mock(spec=DomainUpTimeSlaVector)
-    mock_vector.probe_hosts.return_value = result
+    mock_vector.probe_hosts.side_effect = side_effects
     return mock_vector
 
   @classmethod
-  def create_probe_hosts(cls, num_hosts, predicted, safe, safe_in):
+  def create_probe_hosts(cls, hostname, predicted, safe, safe_in):
     hosts = defaultdict(list)
-    for i in range(num_hosts):
-      host_name = 'h%s' % i
-      job = AuroraJobKey.from_path('west/role/env/job%s' % i)
-      hosts[host_name].append(JobUpTimeDetails(job, predicted, safe, safe_in))
+    job = AuroraJobKey.from_path('west/role/env/job-%s' % hostname)
+    hosts[hostname].append(JobUpTimeDetails(job, predicted, safe, safe_in))
     return [hosts]


Mime
View raw message