aurora-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ma...@apache.org
Subject git commit: Adding "sla get_task_up_count command".
Date Fri, 14 Feb 2014 18:10:34 GMT
Updated Branches:
  refs/heads/master 5acc49410 -> 8c81e5104


Adding "sla get_task_up_count command".

Bugs closed: AURORA-206

Reviewed at https://reviews.apache.org/r/18042/


Project: http://git-wip-us.apache.org/repos/asf/incubator-aurora/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-aurora/commit/8c81e510
Tree: http://git-wip-us.apache.org/repos/asf/incubator-aurora/tree/8c81e510
Diff: http://git-wip-us.apache.org/repos/asf/incubator-aurora/diff/8c81e510

Branch: refs/heads/master
Commit: 8c81e5104974b3f23a246e46c9763c9e9f54f639
Parents: 5acc494
Author: Maxim Khutornenko <maxim@apache.org>
Authored: Fri Feb 14 10:01:49 2014 -0800
Committer: Maxim Khutornenko <maxim@apache.org>
Committed: Fri Feb 14 10:01:49 2014 -0800

----------------------------------------------------------------------
 src/main/python/apache/aurora/client/api/BUILD  | 13 +++
 .../python/apache/aurora/client/api/__init__.py |  5 ++
 src/main/python/apache/aurora/client/api/sla.py | 89 ++++++++++++++++++
 src/main/python/apache/aurora/client/cli/BUILD  |  3 +-
 .../python/apache/aurora/client/cli/client.py   |  2 +
 .../python/apache/aurora/client/cli/options.py  | 12 +++
 src/main/python/apache/aurora/client/cli/sla.py | 83 +++++++++++++++++
 src/test/python/apache/aurora/client/api/BUILD  | 10 +++
 .../python/apache/aurora/client/api/test_sla.py | 95 ++++++++++++++++++++
 src/test/python/apache/aurora/client/cli/BUILD  | 15 +++-
 .../python/apache/aurora/client/cli/test_sla.py | 59 ++++++++++++
 11 files changed, 384 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/8c81e510/src/main/python/apache/aurora/client/api/BUILD
----------------------------------------------------------------------
diff --git a/src/main/python/apache/aurora/client/api/BUILD b/src/main/python/apache/aurora/client/api/BUILD
index 69229a8..32097d2 100644
--- a/src/main/python/apache/aurora/client/api/BUILD
+++ b/src/main/python/apache/aurora/client/api/BUILD
@@ -20,6 +20,7 @@ python_library(
   dependencies = [
     pants(':restarter'),
     pants(':scheduler_client'),
+    pants(':sla'),
     pants(':updater'),
     pants('3rdparty/python:twitter.common.lang'),
     pants('3rdparty/python:twitter.common.log'),
@@ -109,6 +110,18 @@ python_library(
 )
 
 python_library(
+  name = 'sla',
+  sources = ['sla.py'],
+  dependencies = [
+    pants(':scheduler_client'),
+    pants('3rdparty/python:twitter.common.log'),
+    pants('src/main/python/apache/aurora/client:base'),
+    pants('src/main/python/apache/aurora/common'),
+    pants('src/main/thrift/org/apache/aurora/gen:py-thrift'),
+  ]
+)
+
+python_library(
   name = 'updater',
   sources = ['updater.py'],
   dependencies = [

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/8c81e510/src/main/python/apache/aurora/client/api/__init__.py
----------------------------------------------------------------------
diff --git a/src/main/python/apache/aurora/client/api/__init__.py b/src/main/python/apache/aurora/client/api/__init__.py
index 8bc05e2..0999b6f 100644
--- a/src/main/python/apache/aurora/client/api/__init__.py
+++ b/src/main/python/apache/aurora/client/api/__init__.py
@@ -30,6 +30,7 @@ from gen.apache.aurora.ttypes import (
 
 from .restarter import Restarter
 from .scheduler_client import SchedulerProxy
+from .sla import Sla
 from .updater import Updater
 
 
@@ -194,6 +195,10 @@ class AuroraClientAPI(object):
   def unsafe_rewrite_config(self, rewrite_request):
     return self._scheduler_proxy.rewriteConfigs(rewrite_request)
 
+  def sla_get_job_uptime_vector(self, job_key):
+    self._assert_valid_job_key(job_key)
+    return Sla(self._scheduler_proxy).get_job_uptime_vector(job_key)
+
   def _assert_valid_job_key(self, job_key):
     if not isinstance(job_key, AuroraJobKey):
       raise self.TypeError('Invalid job_key %r: expected %s but got %s'

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/8c81e510/src/main/python/apache/aurora/client/api/sla.py
----------------------------------------------------------------------
diff --git a/src/main/python/apache/aurora/client/api/sla.py b/src/main/python/apache/aurora/client/api/sla.py
new file mode 100644
index 0000000..b69d208
--- /dev/null
+++ b/src/main/python/apache/aurora/client/api/sla.py
@@ -0,0 +1,89 @@
+#
+# Copyright 2014 Apache Software Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import math
+import time
+
+from apache.aurora.client.base import check_and_log_response
+
+from gen.apache.aurora.constants import ACTIVE_STATES
+from gen.apache.aurora.ttypes import (
+  Identity,
+  JobKey,
+  Quota,
+  Response,
+  ResponseCode,
+  ScheduleStatus,
+  TaskQuery
+)
+
+
+class JobUpTimeSlaVector(object):
+  """Converts job tasks into SLA vector data: a list of instance uptimes.
+     Exposes an API for converting raw instance uptime data into job SLA metrics.
+  """
+
+  def __init__(self, tasks):
+    self._tasks = tasks
+    self._now = time.time()
+    self._uptime_map = self._instance_uptime()
+
+  def get_task_up_count(self, duration):
+    """Returns the percentage of job tasks that stayed up longer than duration.
+
+    Arguments:
+    duration -- uptime duration in seconds.
+    """
+    total = len(self._uptime_map)
+    above = len([uptime for uptime in self._uptime_map.values() if uptime >= duration])
+    return 0 if not total else 100.0 * above / total
+
+  def _instance_uptime(self):
+    instance_map = {}
+    for task in self._tasks:
+      for event in task.taskEvents:
+        if event.status == ScheduleStatus.STARTING:
+          instance_map[task.assignedTask.instanceId] = self._now - event.timestamp / 1000
+          break
+    return instance_map
+
+
+class Sla(object):
+  """Defines methods for generating job uptime metrics required for monitoring job SLA."""
+
+  def __init__(self, scheduler):
+    self._scheduler = scheduler
+
+  def get_job_uptime_vector(self, job_key):
+    """Returns a JobUpTimeSlaVector object for the given job key.
+
+    Arguments:
+    job_key -- job to create a task uptime vector for.
+    """
+    return JobUpTimeSlaVector(self._get_tasks(self._create_task_query(job_key=job_key)))
+
+  def _get_tasks(self, task_query):
+    resp = self._scheduler.getTasksStatus(task_query)
+    check_and_log_response(resp)
+    return resp.result.scheduleStatusResult.tasks
+
+  def _create_task_query(self, job_key=None, host=None):
+    return TaskQuery(
+        owner=Identity(role=job_key.role) if job_key else None,
+        environment=job_key.env if job_key else None,
+        jobName=job_key.name if job_key else None,
+        slaveHost=host,
+        statuses=ACTIVE_STATES)

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/8c81e510/src/main/python/apache/aurora/client/cli/BUILD
----------------------------------------------------------------------
diff --git a/src/main/python/apache/aurora/client/cli/BUILD b/src/main/python/apache/aurora/client/cli/BUILD
index f5c9ae7..7df9dee 100644
--- a/src/main/python/apache/aurora/client/cli/BUILD
+++ b/src/main/python/apache/aurora/client/cli/BUILD
@@ -39,10 +39,11 @@ python_library(
 
 python_library(
   name='cli',
-  sources = [ '__init__.py', 'jobs.py', 'quota.py', 'context.py', 'options.py' ],
+  sources = [ '__init__.py', 'jobs.py', 'quota.py', 'context.py', 'options.py', 'sla.py'
],
   dependencies = [
     pants('3rdparty/python:argparse'),
     pants('3rdparty/python:twitter.common.python'),
+    pants('3rdparty/python:twitter.common.quantity'),
     pants('src/main/python/apache/aurora/client/api:command_runner'),
     pants('src/main/python/apache/aurora/client/api:disambiguator'),
     pants('src/main/python/apache/aurora/client/api:job_monitor'),

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/8c81e510/src/main/python/apache/aurora/client/cli/client.py
----------------------------------------------------------------------
diff --git a/src/main/python/apache/aurora/client/cli/client.py b/src/main/python/apache/aurora/client/cli/client.py
index e416d38..3c07c94 100644
--- a/src/main/python/apache/aurora/client/cli/client.py
+++ b/src/main/python/apache/aurora/client/cli/client.py
@@ -17,6 +17,8 @@ class AuroraCommandLine(CommandLine):
     self.register_noun(Job())
     from apache.aurora.client.cli.quota import Quota
     self.register_noun(Quota())
+    from apache.aurora.client.cli.sla import Sla
+    self.register_noun(Sla())
 
 
 class AuroraClientV2CommandProcessor(CommandProcessor):

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/8c81e510/src/main/python/apache/aurora/client/cli/options.py
----------------------------------------------------------------------
diff --git a/src/main/python/apache/aurora/client/cli/options.py b/src/main/python/apache/aurora/client/cli/options.py
index 5d6eba2..017f141 100644
--- a/src/main/python/apache/aurora/client/cli/options.py
+++ b/src/main/python/apache/aurora/client/cli/options.py
@@ -17,6 +17,8 @@
 from apache.aurora.client.cli import CommandOption
 from apache.aurora.common.aurora_job_key import AuroraJobKey
 
+from twitter.common.quantity.parse_simple import parse_time
+
 
 def parse_qualified_role(rolestr):
   if rolestr is None:
@@ -42,6 +44,16 @@ def parse_instances(instances):
     result.update(range(int(x[0]), int(x[-1]) + 1))
   return sorted(result)
 
+def parse_time_values(time_values):
+  """Parse lists of discrete time values. Every value must be in the following format: XdYhZmWs.
+     Examples:
+       15m
+       1m,1d,3h25m,2h4m15s
+  """
+  if time_values is None or time_values == '':
+    return None
+  return sorted(map(parse_time, time_values.split(',')))
+
 
 BATCH_OPTION = CommandOption('--batch_size', type=int, default=5,
         help='Number of instances to be operate on in one iteration')

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/8c81e510/src/main/python/apache/aurora/client/cli/sla.py
----------------------------------------------------------------------
diff --git a/src/main/python/apache/aurora/client/cli/sla.py b/src/main/python/apache/aurora/client/cli/sla.py
new file mode 100644
index 0000000..a3973df
--- /dev/null
+++ b/src/main/python/apache/aurora/client/cli/sla.py
@@ -0,0 +1,83 @@
+#
+# Copyright 2014 Apache Software Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from apache.aurora.client.cli import (
+    EXIT_OK,
+    Noun,
+    Verb,
+)
+from apache.aurora.client.cli.context import AuroraCommandContext
+from apache.aurora.client.cli.options import JOBSPEC_ARGUMENT, parse_time_values
+
+from twitter.common.quantity import Time
+
+
+class GetTaskUpCountCmd(Verb):
+  @property
+  def name(self):
+    return 'get_task_up_count'
+
+  @property
+  def help(self):
+    return """Usage: aurora sla get_task_up_count cluster/role/env/job [--duration]
+
+    Prints the percentage of tasks that stayed up within the last "duration" s|m|h|d.
+    If duration is not specified prints a histogram-like log-scale distribution
+    of task uptime percentages.
+    """
+
+  @classmethod
+  def render_get_task_up_count(cls, context, vector):
+    def format_output(durations):
+      return ['%s\t- %.2f%%' % (duration, vector.get_task_up_count(duration.as_(Time.SECONDS)))
+          for duration in durations]
+
+    durations = context.options.durations or parse_time_values('1m,10m,1h,12h,7d')
+    return '\n'.join(format_output(durations))
+
+
+  def setup_options_parser(self, parser):
+    self.add_option(parser, JOBSPEC_ARGUMENT)
+    parser.add_argument('--durations', type=parse_time_values, default=None,
+        help='Durations to report uptime for.'
+             'Format: XdYhZmWs (each field optional but must be in that order.)'
+             'Examples: '
+             '  --durations=1d'
+             '  --durations=3m,10s,1h3m10s')
+
+  def execute(self, context):
+    api = context.get_api(context.options.jobspec.cluster)
+    vector = api.sla_get_job_uptime_vector(context.options.jobspec)
+    context.print_out(self.render_get_task_up_count(context, vector))
+    return EXIT_OK
+
+
+class Sla(Noun):
+  @property
+  def name(self):
+    return 'sla'
+
+  @property
+  def help(self):
+    return 'Work with SLA data in Aurora cluster.'
+
+  @classmethod
+  def create_context(cls):
+    return AuroraCommandContext()
+
+  def __init__(self):
+    super(Sla, self).__init__()
+    self.register_verb(GetTaskUpCountCmd())

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/8c81e510/src/test/python/apache/aurora/client/api/BUILD
----------------------------------------------------------------------
diff --git a/src/test/python/apache/aurora/client/api/BUILD b/src/test/python/apache/aurora/client/api/BUILD
index 9833535..dd9b797 100644
--- a/src/test/python/apache/aurora/client/api/BUILD
+++ b/src/test/python/apache/aurora/client/api/BUILD
@@ -23,6 +23,7 @@ python_test_suite(name = 'all',
     pants(':instance_watcher'),
     pants(':updater'),
     pants(':quota_check'),
+    pants(':sla'),
   ],
 )
 
@@ -85,6 +86,15 @@ python_tests(name = 'quota_check',
   ]
 )
 
+python_tests(name = 'sla',
+  sources = ['test_sla.py'],
+  dependencies = [
+    pants('3rdparty/python:mock'),
+    pants('src/main/python/apache/aurora/client/api:sla'),
+    pants('src/main/thrift/org/apache/aurora/gen:py-thrift'),
+  ]
+)
+
 python_tests(name = 'updater',
   sources = ['test_updater.py'],
   dependencies = [

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/8c81e510/src/test/python/apache/aurora/client/api/test_sla.py
----------------------------------------------------------------------
diff --git a/src/test/python/apache/aurora/client/api/test_sla.py b/src/test/python/apache/aurora/client/api/test_sla.py
new file mode 100644
index 0000000..54f904c
--- /dev/null
+++ b/src/test/python/apache/aurora/client/api/test_sla.py
@@ -0,0 +1,95 @@
+#
+# Copyright 2014 Apache Software Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import unittest
+import time
+
+from apache.aurora.client.api.sla import Sla, JobUpTimeSlaVector
+from apache.aurora.common.aurora_job_key import AuroraJobKey
+
+from gen.apache.aurora.AuroraSchedulerManager import Client as scheduler_client
+from gen.apache.aurora.constants import ACTIVE_STATES
+from gen.apache.aurora.ttypes import (
+    AssignedTask,
+    Identity,
+    Quota,
+    Response,
+    ResponseCode,
+    Result,
+    ScheduleStatus,
+    ScheduleStatusResult,
+    ScheduledTask,
+    TaskConfig,
+    TaskEvent,
+    TaskQuery
+)
+
+from mock import Mock
+
+
+class SlaTest(unittest.TestCase):
+  def setUp(self):
+    self._scheduler = Mock()
+    self._sla = Sla(self._scheduler)
+    self._role = 'mesos'
+    self._name = 'job'
+    self._env = 'test'
+    self._job_key = AuroraJobKey('foo', self._role, self._env, self._name)
+
+  def mock_get_tasks(self, tasks, response_code=None):
+    response_code = ResponseCode.OK if response_code is None else response_code
+    resp = Response(responseCode=response_code, message='test')
+    resp.result = Result(scheduleStatusResult=ScheduleStatusResult(tasks=tasks))
+    self._scheduler.getTasksStatus.return_value = resp
+
+  def create_task(self, duration, id):
+    return ScheduledTask(
+        assignedTask=AssignedTask(instanceId=id, task=TaskConfig(production=True)),
+        status=ScheduleStatus.RUNNING,
+        taskEvents=[TaskEvent(
+            status=ScheduleStatus.STARTING,
+            timestamp=(time.time() - duration) * 1000)]
+    )
+
+  def create_tasks(self, durations):
+    return [self.create_task(duration, index) for index, duration in enumerate(durations)]
+
+
+  def assert_count_result(self, percentage, duration):
+    vector = self._sla.get_job_uptime_vector(self._job_key)
+    actual = vector.get_task_up_count(duration)
+    assert percentage == actual, (
+        'Expected percentage:%s Actual percentage:%s' % (percentage, actual)
+    )
+
+    self._scheduler.getTasksStatus.assert_called_once_with(
+        TaskQuery(
+            owner=Identity(role=self._role),
+            environment=self._env,
+            jobName=self._name,
+            statuses=ACTIVE_STATES))
+
+  def test_count_0(self):
+    self.mock_get_tasks([])
+    self.assert_count_result(0, 0)
+
+  def test_count_50(self):
+    self.mock_get_tasks(self.create_tasks([600, 900, 100, 200]))
+    self.assert_count_result(50, 300)
+
+  def test_count_100(self):
+    self.mock_get_tasks(self.create_tasks([100, 200, 300, 400, 500]))
+    self.assert_count_result(100, 50)

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/8c81e510/src/test/python/apache/aurora/client/cli/BUILD
----------------------------------------------------------------------
diff --git a/src/test/python/apache/aurora/client/cli/BUILD b/src/test/python/apache/aurora/client/cli/BUILD
index c106b97..636f407 100644
--- a/src/test/python/apache/aurora/client/cli/BUILD
+++ b/src/test/python/apache/aurora/client/cli/BUILD
@@ -16,7 +16,7 @@
 
 python_test_suite(
   name = 'all',
-  dependencies = [ pants(':bridge'), pants(':job'), pants(':quota') ]
+  dependencies = [ pants(':bridge'), pants(':job'), pants(':quota'), pants(':sla') ]
 )
 
 python_library(
@@ -69,3 +69,16 @@ python_tests(
     pants('src/test/python/apache/aurora/client/commands:util')
   ]
 )
+
+python_tests(
+  name = 'sla',
+  sources = [ 'test_sla.py' ],
+  dependencies = [
+    pants(':util'),
+    pants('3rdparty/python:mock'),
+    pants('3rdparty/python:twitter.common.contextutil'),
+    pants('src/main/python/apache/aurora/client/cli'),
+    pants('src/main/python/apache/aurora/client/cli:client'),
+    pants('src/test/python/apache/aurora/client/commands:util')
+  ]
+)

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/8c81e510/src/test/python/apache/aurora/client/cli/test_sla.py
----------------------------------------------------------------------
diff --git a/src/test/python/apache/aurora/client/cli/test_sla.py b/src/test/python/apache/aurora/client/cli/test_sla.py
new file mode 100644
index 0000000..703d39e
--- /dev/null
+++ b/src/test/python/apache/aurora/client/cli/test_sla.py
@@ -0,0 +1,59 @@
+#
+# Copyright 2014 Apache Software Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import contextlib
+
+from apache.aurora.client.cli.client import AuroraCommandLine
+from apache.aurora.client.cli.util import AuroraClientCommandTest, FakeAuroraCommandContext
+
+from mock import Mock, patch
+
+
+class TestGetTaskUpCountCommand(AuroraClientCommandTest):
+  @classmethod
+  def setup_mock_sla_uptime_vector(cls, mock_context, upcount):
+    api = mock_context.get_api('west')
+    response = Mock()
+    response.get_task_up_count.return_value = upcount
+    api.sla_get_job_uptime_vector.return_value = response
+
+  def test_get_task_up_count_no_duration(self):
+    mock_context = FakeAuroraCommandContext()
+    self.setup_mock_sla_uptime_vector(mock_context, 10.6533333333)
+    with contextlib.nested(
+        patch('apache.aurora.client.cli.sla.Sla.create_context', return_value=mock_context),
+        patch('apache.aurora.client.factory.CLUSTERS', new=self.TEST_CLUSTERS)):
+      cmd = AuroraCommandLine()
+      cmd.execute(['sla', 'get_task_up_count', 'west/role/env/test'])
+      out = '\n'.join(mock_context.get_out())
+      assert '1 mins\t- 10.65%\n' in out
+      assert '10 mins\t- 10.65%\n' in out
+      assert '1 hrs\t- 10.65%\n' in out
+      assert '12 hrs\t- 10.65%\n' in out
+      assert '7 days\t- 10.65%' in out
+
+  def test_get_task_up_count_with_durations(self):
+    mock_context = FakeAuroraCommandContext()
+    self.setup_mock_sla_uptime_vector(mock_context, 95.3577434734)
+    with contextlib.nested(
+        patch('apache.aurora.client.cli.sla.Sla.create_context', return_value=mock_context),
+        patch('apache.aurora.client.factory.CLUSTERS', new=self.TEST_CLUSTERS)):
+      cmd = AuroraCommandLine()
+      cmd.execute(['sla', 'get_task_up_count', 'west/role/env/test', '--durations=3m,2d6h,3h'])
+      out = '\n'.join(mock_context.get_out())
+      assert '3 mins\t- 95.36%' in out
+      assert '54 hrs\t- 95.36%' in out
+      assert '3 hrs\t- 95.36%' in out


Mime
View raw message