aurora-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ma...@apache.org
Subject git commit: Adding admin_client sla_list_safe_domain command.
Date Fri, 21 Feb 2014 23:14:19 GMT
Repository: incubator-aurora
Updated Branches:
  refs/heads/master e6cd6a9ea -> 95459d809


Adding admin_client sla_list_safe_domain command.

Bugs closed: AURORA-208

Reviewed at https://reviews.apache.org/r/18296/


Project: http://git-wip-us.apache.org/repos/asf/incubator-aurora/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-aurora/commit/95459d80
Tree: http://git-wip-us.apache.org/repos/asf/incubator-aurora/tree/95459d80
Diff: http://git-wip-us.apache.org/repos/asf/incubator-aurora/diff/95459d80

Branch: refs/heads/master
Commit: 95459d809ce9af5213f4e12153c8ec47df10eb0c
Parents: e6cd6a9
Author: Maxim Khutornenko <maxim@apache.org>
Authored: Fri Feb 21 15:13:45 2014 -0800
Committer: Maxim Khutornenko <maxim@apache.org>
Committed: Fri Feb 21 15:13:45 2014 -0800

----------------------------------------------------------------------
 .../python/apache/aurora/client/api/__init__.py |   3 +
 src/main/python/apache/aurora/client/api/sla.py | 102 ++++++++-
 .../apache/aurora/client/commands/admin.py      | 109 +++++++++-
 .../python/apache/aurora/client/api/test_sla.py |  71 ++++++-
 .../python/apache/aurora/client/commands/BUILD  |  15 +-
 .../aurora/client/commands/test_admin_sla.py    | 206 +++++++++++++++++++
 .../apache/aurora/client/commands/util.py       |   1 -
 7 files changed, 490 insertions(+), 17 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/95459d80/src/main/python/apache/aurora/client/api/__init__.py
----------------------------------------------------------------------
diff --git a/src/main/python/apache/aurora/client/api/__init__.py b/src/main/python/apache/aurora/client/api/__init__.py
index 0999b6f..37b7f42 100644
--- a/src/main/python/apache/aurora/client/api/__init__.py
+++ b/src/main/python/apache/aurora/client/api/__init__.py
@@ -199,6 +199,9 @@ class AuroraClientAPI(object):
     self._assert_valid_job_key(job_key)
     return Sla(self._scheduler_proxy).get_job_uptime_vector(job_key)
 
+  def sla_get_safe_domain_vector(self):
+    return Sla(self._scheduler_proxy).get_domain_uptime_vector(self._cluster)
+
   def _assert_valid_job_key(self, job_key):
     if not isinstance(job_key, AuroraJobKey):
       raise self.TypeError('Invalid job_key %r: expected %s but got %s'

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/95459d80/src/main/python/apache/aurora/client/api/sla.py
----------------------------------------------------------------------
diff --git a/src/main/python/apache/aurora/client/api/sla.py b/src/main/python/apache/aurora/client/api/sla.py
index b0b2b24..fd5edd1 100644
--- a/src/main/python/apache/aurora/client/api/sla.py
+++ b/src/main/python/apache/aurora/client/api/sla.py
@@ -17,12 +17,15 @@
 import math
 import time
 
+from collections import defaultdict, namedtuple
+from copy import deepcopy
+
 from apache.aurora.client.base import check_and_log_response
+from apache.aurora.common.aurora_job_key import AuroraJobKey
 
 from gen.apache.aurora.constants import ACTIVE_STATES
 from gen.apache.aurora.ttypes import (
   Identity,
-  JobKey,
   Quota,
   Response,
   ResponseCode,
@@ -32,24 +35,30 @@ from gen.apache.aurora.ttypes import (
 
 
 class JobUpTimeSlaVector(object):
-  """Converts job tasks into SLA vector data: a list of instance uptimes.
+  """A grouping of job active tasks by:
+      - instance: Map of instance ID -> instance uptime in seconds.
      Exposes an API for converting raw instance uptime data into job SLA metrics.
   """
 
-  def __init__(self, tasks):
+  def __init__(self, tasks, now=None):
     self._tasks = tasks
-    self._now = time.time()
+    self._now = now or time.time()
     self._uptime_map = self._instance_uptime()
 
-  def get_task_up_count(self, duration):
+  def total_tasks(self):
+    """Returns the total count of active tasks."""
+    return len(self._uptime_map)
+
+  def get_task_up_count(self, duration, total_tasks=None):
     """Returns the percentage of job tasks that stayed up longer than duration.
 
     Arguments:
     duration -- uptime duration in seconds.
+    total_tasks -- optional total task count to calculate against.
     """
-    total = len(self._uptime_map)
+    total = total_tasks or len(self._uptime_map)
     above = len([uptime for uptime in self._uptime_map.values() if uptime >= duration])
-    return 0 if not total else 100.0 * above / total
+    return 100.0 * above / total if total else 0
 
   def get_job_uptime(self, percentile):
     """Returns the uptime (in seconds) of the job at the specified percentile.
@@ -75,6 +84,81 @@ class JobUpTimeSlaVector(object):
     return instance_map
 
 
+class DomainUpTimeSlaVector(object):
+  """A grouping of all active tasks in the cluster by:
+      - job: Map of job_key -> task. Provides logical mapping between jobs and their active
tasks.
+      - host: Map of hostname -> job_key. Provides logical mapping between hosts and their
jobs.
+     Exposes an API for querying safe domain details.
+  """
+
+  JobUpTimeLimit = namedtuple('JobUpTimeLimit', ['job', 'percentage', 'duration_seconds'])
+
+  def __init__(self, cluster, tasks):
+    self._cluster = cluster
+    self._tasks = tasks
+    self._now = time.time()
+    self._jobs, self._hosts = self._init_mappings()
+
+  def get_safe_hosts(self, percentage, duration, job_limits=None):
+    """Returns hosts safe to restart with respect to their job SLA.
+       Every host is analyzed separately without considering other job hosts.
+
+       Arguments:
+       percentage -- default task up count percentage. Used if job_limits mapping is not
found.
+       duration -- default task uptime duration in seconds. Used if job_limits mapping is
not found.
+       job_limits -- optional SLA override map. Key: job key. Value JobUpTimeLimit. If specified,
+                     replaces default percentage/duration within the job context.
+    """
+    safe_hosts = defaultdict(list)
+    for host, job_keys in self._hosts.items():
+      safe_limits = []
+      for job_key in job_keys:
+        # Get total job task count to use in SLA calculation.
+        total_count = JobUpTimeSlaVector(self._jobs[job_key]).total_tasks()
+
+        # Get a list of job tasks that would remain after the affected host goes down
+        # and create an SLA vector with these tasks.
+        filtered_tasks = [task for task in self._jobs[job_key]
+                          if task.assignedTask.slaveHost != host]
+        filtered_vector = JobUpTimeSlaVector(filtered_tasks, self._now)
+
+        job_duration = duration
+        job_percentage = percentage
+        if job_limits and job_key in job_limits:
+          job_duration = job_limits[job_key].duration_seconds
+          job_percentage = job_limits[job_key].percentage
+
+        # Calculate the SLA that would be in effect should the host go down.
+        filtered_percentage = filtered_vector.get_task_up_count(job_duration, total_count)
+        safe_limits.append(self.JobUpTimeLimit(job_key, filtered_percentage, job_duration))
+
+        if filtered_percentage < job_percentage:
+          break
+
+      else:
+        safe_hosts[host] = safe_limits
+
+    return safe_hosts
+
+  def _init_mappings(self):
+    def job_key_from_scheduled(task):
+      return AuroraJobKey(
+          cluster=self._cluster,
+          role=task.assignedTask.task.owner.role,
+          env=task.assignedTask.task.environment,
+          name=task.assignedTask.task.jobName
+      )
+
+    jobs = defaultdict(list)
+    hosts = defaultdict(list)
+    for task in self._tasks:
+      job_key = job_key_from_scheduled(task)
+      jobs[job_key].append(task)
+      hosts[task.assignedTask.slaveHost].append(job_key)
+
+    return jobs, hosts
+
+
 class Sla(object):
   """Defines methods for generating job uptime metrics required for monitoring job SLA."""
 
@@ -89,6 +173,10 @@ class Sla(object):
     """
     return JobUpTimeSlaVector(self._get_tasks(self._create_task_query(job_key=job_key)))
 
+  def get_domain_uptime_vector(self, cluster):
+    """Returns a DomainUpTimeSlaVector object with all available job uptimes."""
+    return DomainUpTimeSlaVector(cluster, self._get_tasks(self._create_task_query()))
+
   def _get_tasks(self, task_query):
     resp = self._scheduler.getTasksStatus(task_query)
     check_and_log_response(resp)

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/95459d80/src/main/python/apache/aurora/client/commands/admin.py
----------------------------------------------------------------------
diff --git a/src/main/python/apache/aurora/client/commands/admin.py b/src/main/python/apache/aurora/client/commands/admin.py
index 45686ae..989c5b6 100644
--- a/src/main/python/apache/aurora/client/commands/admin.py
+++ b/src/main/python/apache/aurora/client/commands/admin.py
@@ -25,7 +25,9 @@ import subprocess
 
 from apache.aurora.admin.mesos_maintenance import MesosMaintenance
 from apache.aurora.client.api import AuroraClientAPI
+from apache.aurora.client.api.sla import DomainUpTimeSlaVector
 from apache.aurora.client.base import check_and_log_response, die, requires
+from apache.aurora.common.aurora_job_key import AuroraJobKey
 from apache.aurora.common.clusters import CLUSTERS
 
 from gen.apache.aurora.constants import ACTIVE_STATES, TERMINAL_STATES
@@ -36,8 +38,8 @@ from gen.apache.aurora.ttypes import (
 )
 
 from twitter.common import app, log
-from twitter.common.quantity import Amount, Data
-from twitter.common.quantity.parse_simple import parse_data
+from twitter.common.quantity import Amount, Data, Time
+from twitter.common.quantity.parse_simple import parse_data, parse_time
 
 
 GROUPING_OPTION = optparse.Option(
@@ -51,18 +53,28 @@ GROUPING_OPTION = optparse.Option(
         ', '.join(MesosMaintenance.GROUPING_FUNCTIONS.keys())))
 
 
+def parse_host_file(filename):
+  with open(filename, 'r') as hosts:
+    hosts = [hostname.strip() for hostname in hosts]
+  if not hosts:
+    die('No valid hosts found in %s.' % filename)
+  return hosts
+
 def parse_hosts(options):
   if bool(options.filename) == bool(options.hosts):
     die('Please specify either --filename or --hosts')
   if options.filename:
-    with open(options.filename, 'r') as hosts:
-      hosts = [hostname.strip() for hostname in hosts]
+    hosts = parse_host_file(options.filename)
   elif options.hosts:
     hosts = [hostname.strip() for hostname in options.hosts.split(",")]
   if not hosts:
     die('No valid hosts found.')
   return hosts
 
+def print_results(results):
+  for line in results:
+    print(line)
+
 
 @app.command
 @app.command_option('--force', dest='force', default=False, action='store_true',
@@ -409,3 +421,92 @@ def scheduler_snapshot(cluster):
   """
   options = app.get_options()
   check_and_log_response(AuroraClientAPI(CLUSTERS['cluster'], options.verbosity).snapshot())
+
+
+@app.command
+@app.command_option('-i', '--include_hosts', dest='include_filename', default=None,
+    help='Inclusion filter. An optional text file listing hosts (one per line)'
+         'to include into the result set if found. Example: cl1-aau-dev2.test.com')
+@app.command_option('-x', '--exclude_hosts', dest='exclude_filename', default=None,
+    help='Exclusion filter. An optional text file listing hosts (one per line)'
+         'to exclude from the result set if found. Example: cl1-aau-dev1.test.com')
+@app.command_option('-l', '--list_jobs', dest='list_jobs', default=False, action='store_true',
+    help='Lists all affected job keys with projected new SLAs if their tasks get killed'
+         'in the following column format:\n'
+         'HOST  JOB  PREDICTED_SLA  DURATION_SECONDS')
+@app.command_option('-o', '--override_jobs', dest='override_filename', default=None,
+    help='An optional text file to load job specific SLAs that will override'
+         'cluster-wide command line percentage and duration values.'
+         'The file can have multiple lines in the following format:'
+         '"cluster/role/env/job percentage duration". Example: cl/mesos/prod/labrat 95 2h')
+@requires.exactly('cluster', 'percentage', 'duration')
+def sla_list_safe_domain(cluster, percentage, duration):
+  """usage: sla_list_safe_domain
+            [--exclude_hosts=filename]
+            [--include_hosts=filename]
+            [--list_jobs]
+            [--override_jobs=filename]
+            cluster percentage duration
+
+  Returns a list of relevant hosts where it would be safe to kill
+  tasks without violating their job SLA. The SLA is defined as a pair of
+  percentage and duration, where:
+
+  percentage - Percentage of tasks required to be up within the duration.
+  Applied to all jobs except those listed in --override_jobs file;
+
+  duration - Time interval (now - value) for the percentage of up tasks.
+  Applied to all jobs except those listed in --override_jobs file.
+  Format: XdYhZmWs (each field is optional but must be in that order.)
+  Examples: 5m, 1d3h45m.
+  """
+  def parse_percentage(percentage):
+    val = float(percentage)
+    if val <= 0 or val > 100:
+      die('Invalid percentage %s. Must be within (0, 100].' % percentage)
+    return val
+
+  def parse_jobs_file(filename):
+    result = {}
+    with open(filename, 'r') as overrides:
+      for line in overrides:
+        if not line.strip():
+          continue
+
+        tokens = line.split()
+        if len(tokens) != 3:
+          die('Invalid line in %s:%s' % (filename, line))
+        job_key = AuroraJobKey.from_path(tokens[0])
+        result[job_key] = DomainUpTimeSlaVector.JobUpTimeLimit(
+            job=job_key,
+            percentage=parse_percentage(tokens[1]),
+            duration_seconds=parse_time(tokens[2]).as_(Time.SECONDS)
+        )
+    return result
+
+  options = app.get_options()
+
+  sla_percentage = parse_percentage(percentage)
+  sla_duration = parse_time(duration)
+
+  exclude_hosts = parse_host_file(options.exclude_filename) if options.exclude_filename else
[]
+  include_hosts = parse_host_file(options.include_filename) if options.include_filename else
[]
+  override_jobs = parse_jobs_file(options.override_filename) if options.override_filename
else {}
+
+  vector = AuroraClientAPI(CLUSTERS[cluster], options.verbosity).sla_get_safe_domain_vector()
+  hosts = vector.get_safe_hosts(sla_percentage, sla_duration.as_(Time.SECONDS), override_jobs)
+
+  results = []
+  for host in sorted(hosts.keys()):
+    if include_hosts and host not in include_hosts or exclude_hosts and host in exclude_hosts:
+      continue
+
+    if options.list_jobs:
+      results.append('\n'.join(['%s %s %.2f %d' %
+                               (host, limit.job.to_path(), limit.percentage, limit.duration_seconds)
+                                for limit in hosts[host]]))
+    else:
+      results.append('%s' % host)
+
+  print_results(results)
+

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/95459d80/src/test/python/apache/aurora/client/api/test_sla.py
----------------------------------------------------------------------
diff --git a/src/test/python/apache/aurora/client/api/test_sla.py b/src/test/python/apache/aurora/client/api/test_sla.py
index 2e6c8fa..2778545 100644
--- a/src/test/python/apache/aurora/client/api/test_sla.py
+++ b/src/test/python/apache/aurora/client/api/test_sla.py
@@ -17,7 +17,7 @@
 import unittest
 import time
 
-from apache.aurora.client.api.sla import Sla, JobUpTimeSlaVector
+from apache.aurora.client.api.sla import DomainUpTimeSlaVector, JobUpTimeSlaVector, Sla
 from apache.aurora.common.aurora_job_key import AuroraJobKey
 
 from gen.apache.aurora.AuroraSchedulerManager import Client as scheduler_client
@@ -44,10 +44,11 @@ class SlaTest(unittest.TestCase):
   def setUp(self):
     self._scheduler = Mock()
     self._sla = Sla(self._scheduler)
+    self._cluster = 'cl'
     self._role = 'mesos'
     self._name = 'job'
     self._env = 'test'
-    self._job_key = AuroraJobKey('foo', self._role, self._env, self._name)
+    self._job_key = AuroraJobKey(self._cluster, self._role, self._env, self._name)
 
   def mock_get_tasks(self, tasks, response_code=None):
     response_code = ResponseCode.OK if response_code is None else response_code
@@ -55,9 +56,16 @@ class SlaTest(unittest.TestCase):
     resp.result = Result(scheduleStatusResult=ScheduleStatusResult(tasks=tasks))
     self._scheduler.getTasksStatus.return_value = resp
 
-  def create_task(self, duration, id):
+  def create_task(self, duration, id, host=None, name=None):
     return ScheduledTask(
-        assignedTask=AssignedTask(instanceId=id, task=TaskConfig(production=True)),
+        assignedTask=AssignedTask(
+            instanceId=id,
+            slaveHost=host,
+            task=TaskConfig(
+                production=True,
+                jobName=name or self._name,
+                owner=Identity(role=self._role),
+                environment=self._env)),
         status=ScheduleStatus.RUNNING,
         taskEvents=[TaskEvent(
             status=ScheduleStatus.STARTING,
@@ -88,6 +96,23 @@ class SlaTest(unittest.TestCase):
       )
       self.expect_task_status_call()
 
+  def assert_safe_domain_result(self, host, percentage, duration, in_limit=None, out_limit=None):
+    vector = self._sla.get_domain_uptime_vector(self._cluster)
+    result = vector.get_safe_hosts(percentage, duration, in_limit)
+    assert 1 == len(result), ('Expected length:%s Actual length:%s' % (1, len(result)))
+    assert host in result, ('Expected host:%s not found in result' % host)
+    if out_limit:
+      assert result[host][0].job.name == out_limit.job.name, (
+          'Expected job:%s Actual:%s' % (out_limit.job.name, result[host][0].job.name)
+      )
+      assert result[host][0].percentage == out_limit.percentage, (
+        'Expected %%:%s Actual %%:%s' % (out_limit.percentage, result[host][0].percentage)
+      )
+      assert result[host][0].duration == out_limit.duration, (
+        'Expected duration:%s Actual duration:%s' % (out_limit.duration, result[host][0].duration)
+      )
+    self._scheduler.getTasksStatus.assert_called_once_with(TaskQuery(statuses=ACTIVE_STATES))
+
   def expect_task_status_call(self):
     self._scheduler.getTasksStatus.assert_called_once_with(
         TaskQuery(
@@ -133,3 +158,41 @@ class SlaTest(unittest.TestCase):
   def test_uptime_100(self):
     self.mock_get_tasks(self.create_tasks([100, 200, 300, 400]))
     self.assert_uptime_result(None, 100)
+
+  def test_domain_uptime_no_tasks(self):
+    self.mock_get_tasks([])
+    vector = self._sla.get_domain_uptime_vector(self._cluster)
+    assert 0 == len(vector.get_safe_hosts(50, 400)), 'Length must be empty.'
+
+  def test_domain_uptime_no_result(self):
+    self.mock_get_tasks([
+        self.create_task(100, 1, 'h1', 'j1'),
+        self.create_task(200, 2, 'h2', 'j1')
+    ])
+    vector = self._sla.get_domain_uptime_vector(self._cluster)
+    assert 0 == len(vector.get_safe_hosts(50, 400)), 'Length must be empty.'
+
+  def test_domain_uptime(self):
+    self.mock_get_tasks([
+      self.create_task(100, 1, 'h1', 'j1'),
+      self.create_task(200, 2, 'h2', 'j1'),
+      self.create_task(100, 1, 'h2', 'j2')
+    ])
+    self.assert_safe_domain_result('h1', 50, 200)
+
+  def test_domain_uptime_with_override(self):
+    self.mock_get_tasks([
+      self.create_task(100, 1, 'h1', self._name),
+      self.create_task(200, 2, 'h2', self._name),
+      self.create_task(100, 1, 'h2', 'j2')
+    ])
+
+    job_override = {
+        self._job_key:
+        DomainUpTimeSlaVector.JobUpTimeLimit(
+            job=self._job_key,
+            percentage=50,
+            duration_seconds=100)
+    }
+    self.assert_safe_domain_result('h1', 50, 400, in_limit=job_override)
+

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/95459d80/src/test/python/apache/aurora/client/commands/BUILD
----------------------------------------------------------------------
diff --git a/src/test/python/apache/aurora/client/commands/BUILD b/src/test/python/apache/aurora/client/commands/BUILD
index 02c27aa..6d448d7 100644
--- a/src/test/python/apache/aurora/client/commands/BUILD
+++ b/src/test/python/apache/aurora/client/commands/BUILD
@@ -16,7 +16,7 @@
 
 python_test_suite(
   name = 'all',
-  dependencies = [ pants(':core'), pants(':run'), pants(':ssh') ]
+  dependencies = [ pants(':core'), pants(':run'), pants(':ssh'), pants(':admin') ]
 )
 
 python_tests(
@@ -40,6 +40,19 @@ python_tests(
   ])
 
 python_tests(
+  name='admin',
+  sources = [
+    'test_admin_sla.py',
+  ],
+  dependencies = [
+    pants(':util'),
+    pants('3rdparty/python:mock'),
+    pants('3rdparty/python:twitter.common.contextutil'),
+    pants('src/main/python/apache/aurora/client/commands:admin'),
+    pants('src/main/thrift/org/apache/aurora/gen:py-thrift'),
+  ])
+
+python_tests(
   name='ssh',
   sources = [
     'test_ssh.py',

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/95459d80/src/test/python/apache/aurora/client/commands/test_admin_sla.py
----------------------------------------------------------------------
diff --git a/src/test/python/apache/aurora/client/commands/test_admin_sla.py b/src/test/python/apache/aurora/client/commands/test_admin_sla.py
new file mode 100644
index 0000000..780ad18
--- /dev/null
+++ b/src/test/python/apache/aurora/client/commands/test_admin_sla.py
@@ -0,0 +1,206 @@
+#
+# Copyright 2014 Apache Software Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import contextlib
+
+from collections import defaultdict
+
+from apache.aurora.client.api import AuroraClientAPI
+from apache.aurora.client.api.sla import DomainUpTimeSlaVector
+from apache.aurora.client.commands.admin import sla_list_safe_domain
+from apache.aurora.client.commands.util import AuroraClientCommandTest
+from apache.aurora.common.aurora_job_key import AuroraJobKey
+
+from twitter.common.contextutil import temporary_file
+
+from mock import Mock, patch
+
+
+class TestAdminSlaListSafeDomainCommand(AuroraClientCommandTest):
+
+  @classmethod
+  def setup_mock_options(cls, exclude=None, include=None, override=None, list_jobs=False):
+    mock_options = Mock()
+    mock_options.exclude_filename = exclude
+    mock_options.include_filename = include
+    mock_options.override_filename = override
+    mock_options.list_jobs = list_jobs
+    mock_options.verbosity = False
+    return mock_options
+
+  @classmethod
+  def create_hosts(cls, num_hosts, percentage, duration):
+    hosts = defaultdict(list)
+    for i in range(num_hosts):
+      host_name = 'h%s' % i
+      job = AuroraJobKey.from_path('west/role/env/job%s' % i)
+      hosts[host_name].append(DomainUpTimeSlaVector.JobUpTimeLimit(job, percentage, duration))
+    return hosts
+
+  @classmethod
+  def create_mock_vector(cls, result):
+    mock_vector = Mock(spec=DomainUpTimeSlaVector)
+    mock_vector.get_safe_hosts.return_value = result
+    return mock_vector
+
+  def test_safe_domain_no_options(self):
+    """Tests successful execution of the sla_list_safe_domain command without extra options."""
+    mock_options = self.setup_mock_options()
+    mock_vector = self.create_mock_vector(self.create_hosts(3, 80, 100))
+    with contextlib.nested(
+        patch('apache.aurora.client.commands.admin.AuroraClientAPI', new=Mock(spec=AuroraClientAPI)),
+        patch('apache.aurora.client.commands.admin.print_results'),
+        patch('apache.aurora.client.commands.admin.CLUSTERS', new=self.TEST_CLUSTERS),
+        patch('twitter.common.app.get_options', return_value=mock_options)
+    ) as (
+        mock_api,
+        mock_print_results,
+        test_clusters,
+        mock_options):
+
+      mock_api.return_value.sla_get_safe_domain_vector.return_value = mock_vector
+      sla_list_safe_domain(['west', '50', '100s'])
+
+      mock_vector.get_safe_hosts.assert_called_once_with(50.0, 100.0, {})
+      mock_print_results.assert_called_once_with(['h0', 'h1', 'h2'])
+
+  def test_safe_domain_exclude_hosts(self):
+    """Test successful execution of the sla_list_safe_domain command with exclude hosts option."""
+    mock_vector = self.create_mock_vector(self.create_hosts(3, 80, 100))
+    with temporary_file() as fp:
+      fp.write('h1')
+      fp.flush()
+      mock_options = self.setup_mock_options(exclude=fp.name)
+      with contextlib.nested(
+          patch('apache.aurora.client.commands.admin.AuroraClientAPI', new=Mock(spec=AuroraClientAPI)),
+          patch('apache.aurora.client.commands.admin.print_results'),
+          patch('apache.aurora.client.commands.admin.CLUSTERS', new=self.TEST_CLUSTERS),
+          patch('twitter.common.app.get_options', return_value=mock_options)
+      ) as (
+          mock_api,
+          mock_print_results,
+          test_clusters,
+          mock_options):
+
+        mock_api.return_value.sla_get_safe_domain_vector.return_value = mock_vector
+
+        sla_list_safe_domain(['west', '50', '100s'])
+
+        mock_vector.get_safe_hosts.assert_called_once_with(50.0, 100.0, {})
+        mock_print_results.assert_called_once_with(['h0', 'h2'])
+
+  def test_safe_domain_include_hosts(self):
+    """Test successful execution of the sla_list_safe_domain command with include hosts option."""
+    mock_vector = self.create_mock_vector(self.create_hosts(3, 80, 100))
+    with temporary_file() as fp:
+      fp.write('h1')
+      fp.flush()
+      mock_options = self.setup_mock_options(include=fp.name)
+      with contextlib.nested(
+          patch('apache.aurora.client.commands.admin.AuroraClientAPI', new=Mock(spec=AuroraClientAPI)),
+          patch('apache.aurora.client.commands.admin.print_results'),
+          patch('apache.aurora.client.commands.admin.CLUSTERS', new=self.TEST_CLUSTERS),
+          patch('twitter.common.app.get_options', return_value=mock_options)
+      ) as (
+          mock_api,
+          mock_print_results,
+          test_clusters,
+          mock_options):
+
+        mock_api.return_value.sla_get_safe_domain_vector.return_value = mock_vector
+
+        sla_list_safe_domain(['west', '50', '100s'])
+
+        mock_vector.get_safe_hosts.assert_called_once_with(50.0, 100.0, {})
+        mock_print_results.assert_called_once_with(['h1'])
+
+  def test_safe_domain_override_jobs(self):
+    """Test successful execution of the sla_list_safe_domain command with override_jobs option."""
+    mock_vector = self.create_mock_vector(self.create_hosts(3, 80, 100))
+    with temporary_file() as fp:
+      fp.write('west/role/env/job1 30 200s')
+      fp.flush()
+      mock_options = self.setup_mock_options(override=fp.name)
+      with contextlib.nested(
+          patch('apache.aurora.client.commands.admin.AuroraClientAPI', new=Mock(spec=AuroraClientAPI)),
+          patch('apache.aurora.client.commands.admin.print_results'),
+          patch('apache.aurora.client.commands.admin.CLUSTERS', new=self.TEST_CLUSTERS),
+          patch('twitter.common.app.get_options', return_value=mock_options)
+      ) as (
+          mock_api,
+          mock_print_results,
+          test_clusters,
+          mock_options):
+
+        mock_api.return_value.sla_get_safe_domain_vector.return_value = mock_vector
+
+        sla_list_safe_domain(['west', '50', '100s'])
+
+        job_key = AuroraJobKey.from_path('west/role/env/job1')
+        override = {job_key: DomainUpTimeSlaVector.JobUpTimeLimit(job_key, 30, 200)}
+        mock_vector.get_safe_hosts.assert_called_once_with(50.0, 100.0, override)
+        mock_print_results.assert_called_once_with(['h0', 'h1', 'h2'])
+
+  def test_safe_domain_list_jobs(self):
+    """Tests successful execution of the sla_list_safe_domain command with list_jobs option."""
+    mock_options = self.setup_mock_options(list_jobs=True)
+    mock_vector = self.create_mock_vector(self.create_hosts(3, 50, 100))
+    with contextlib.nested(
+        patch('apache.aurora.client.commands.admin.AuroraClientAPI', new=Mock(spec=AuroraClientAPI)),
+        patch('apache.aurora.client.commands.admin.print_results'),
+        patch('apache.aurora.client.commands.admin.CLUSTERS', new=self.TEST_CLUSTERS),
+        patch('twitter.common.app.get_options', return_value=mock_options)
+    ) as (
+        mock_api,
+        mock_print_results,
+        test_clusters,
+        mock_options):
+
+      mock_api.return_value.sla_get_safe_domain_vector.return_value = mock_vector
+      sla_list_safe_domain(['west', '50', '100s'])
+
+      mock_vector.get_safe_hosts.assert_called_once_with(50.0, 100.0, {})
+      mock_print_results.assert_called_once_with([
+          'h0 west/role/env/job0 50.00 100',
+          'h1 west/role/env/job1 50.00 100',
+          'h2 west/role/env/job2 50.00 100'])
+
+  def test_safe_domain_invalid_percentage(self):
+    """Tests execution of the sla_list_safe_domain command with invalid percentage"""
+    mock_options = self.setup_mock_options()
+    with patch('twitter.common.app.get_options', return_value=mock_options) as (mock_options):
+
+      try:
+        sla_list_safe_domain(['west', '0', '100s'])
+      except SystemExit:
+        pass
+      else:
+        assert 'Expected error is not raised.'
+
+  def test_safe_domain_malformed_job_override(self):
+    """Tests execution of the sla_list_safe_domain command with invalid job_override file"""
+    with temporary_file() as fp:
+      fp.write('30 200s')
+      fp.flush()
+      mock_options = self.setup_mock_options(override=fp.name)
+      with patch('twitter.common.app.get_options', return_value=mock_options) as (mock_options):
+
+        try:
+          sla_list_safe_domain(['west', '50', '100s'])
+        except SystemExit:
+          pass
+        else:
+          assert 'Expected error is not raised.'

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/95459d80/src/test/python/apache/aurora/client/commands/util.py
----------------------------------------------------------------------
diff --git a/src/test/python/apache/aurora/client/commands/util.py b/src/test/python/apache/aurora/client/commands/util.py
index 11ae990..82cadcf 100644
--- a/src/test/python/apache/aurora/client/commands/util.py
+++ b/src/test/python/apache/aurora/client/commands/util.py
@@ -54,7 +54,6 @@ class AuroraClientCommandTest(unittest.TestCase):
     # what API calls get made against the scheduler, and both of these objects
     # delegate calls to the scheduler. It doesn't matter which one is used:
     # what we care about is that the right API calls get made.
-    mock_api = Mock(spec=HookedAuroraClientAPI)
     mock_scheduler_proxy = Mock()
     mock_scheduler_proxy.url = "http://something_or_other"
     mock_scheduler_proxy.scheduler_client.return_value = mock_scheduler_proxy


Mime
View raw message