ambari-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From smoha...@apache.org
Subject ambari git commit: AMBARI-11539. Auto recovery should recover components to INSTALLED if desired state is INSTALLED but the component instance is running (smohanty)
Date Fri, 29 May 2015 17:36:44 GMT
Repository: ambari
Updated Branches:
  refs/heads/trunk c2764be2f -> 3c563d3b2


AMBARI-11539. Auto recovery should recover components to INSTALLED if desired state is INSTALLED
but the component instance is running (smohanty)


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/3c563d3b
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/3c563d3b
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/3c563d3b

Branch: refs/heads/trunk
Commit: 3c563d3b282ab7a552f9dc95b84d6e2a13414166
Parents: c2764be
Author: Sumit Mohanty <smohanty@hortonworks.com>
Authored: Fri May 29 10:36:34 2015 -0700
Committer: Sumit Mohanty <smohanty@hortonworks.com>
Committed: Fri May 29 10:36:34 2015 -0700

----------------------------------------------------------------------
 .../src/main/python/ambari_agent/ActionQueue.py |  2 +-
 .../main/python/ambari_agent/RecoveryManager.py | 27 ++++++++++++++++++--
 .../python/ambari_agent/TestRecoveryManager.py  |  9 ++++++-
 3 files changed, 34 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/3c563d3b/ambari-agent/src/main/python/ambari_agent/ActionQueue.py
----------------------------------------------------------------------
diff --git a/ambari-agent/src/main/python/ambari_agent/ActionQueue.py b/ambari-agent/src/main/python/ambari_agent/ActionQueue.py
index c4c84a8..d7880d0 100644
--- a/ambari-agent/src/main/python/ambari_agent/ActionQueue.py
+++ b/ambari-agent/src/main/python/ambari_agent/ActionQueue.py
@@ -422,7 +422,7 @@ class ActionQueue(threading.Thread):
       else:
         component_status = LiveStatus.DEAD_STATUS
         self.controller.recovery_manager.update_current_status(component, component_status)
-        request_execution_cmd = self.controller.recovery_manager.requires_recovery(component)
+      request_execution_cmd = self.controller.recovery_manager.requires_recovery(component)
 
       if component_status_result.has_key('structuredOut'):
         component_extra = component_status_result['structuredOut']

http://git-wip-us.apache.org/repos/asf/ambari/blob/3c563d3b/ambari-agent/src/main/python/ambari_agent/RecoveryManager.py
----------------------------------------------------------------------
diff --git a/ambari-agent/src/main/python/ambari_agent/RecoveryManager.py b/ambari-agent/src/main/python/ambari_agent/RecoveryManager.py
index 83206ee..d0e8a2c 100644
--- a/ambari-agent/src/main/python/ambari_agent/RecoveryManager.py
+++ b/ambari-agent/src/main/python/ambari_agent/RecoveryManager.py
@@ -194,6 +194,8 @@ class RecoveryManager:
       status = self.statuses[component]
       if status["current"] == status["desired"]:
         return False
+      if status["desired"] not in self.allowed_desired_states:
+        return False
     else:
       status = self.statuses[component]
       if status["current"] == status["desired"] and status['stale_config'] == False:
@@ -279,7 +281,8 @@ class RecoveryManager:
             elif status["desired"] == self.INSTALLED:
               if status["current"] == self.INIT:
                 command = self.get_install_command(component)
-              # else issue a STOP command
+              elif status["current"] == self.STARTED:
+                command = self.get_stop_command(component)
           else:
             if status["current"] == self.INSTALLED:
               command = self.get_install_command(component)
@@ -523,7 +526,7 @@ class RecoveryManager:
       for command in commands:
         if self.COMMAND_TYPE in command and command[self.COMMAND_TYPE] == ActionQueue.EXECUTION_COMMAND:
           if self.ROLE in command:
-            if command[self.ROLE_COMMAND] == ActionQueue.ROLE_COMMAND_INSTALL:
+            if command[self.ROLE_COMMAND] in (ActionQueue.ROLE_COMMAND_INSTALL, ActionQueue.ROLE_COMMAND_STOP):
               self.update_desired_status(command[self.ROLE], LiveStatus.DEAD_STATUS)
             if command[self.ROLE_COMMAND] == ActionQueue.ROLE_COMMAND_START:
               self.update_desired_status(command[self.ROLE], LiveStatus.LIVE_STATUS)
@@ -580,6 +583,26 @@ class RecoveryManager:
     return None
     pass
 
+  def get_stop_command(self, component):
+    if self.paused:
+      logger.info("Recovery is paused, likely tasks waiting in pipeline for this host.")
+      return None
+
+    if self.enabled():
+      logger.debug("Using stored STOP command for %s", component)
+      if self.command_exists(component, ActionQueue.EXECUTION_COMMAND):
+        command = copy.deepcopy(self.stored_exec_commands[component])
+        command[self.ROLE_COMMAND] = "STOP"
+        command[self.COMMAND_TYPE] = ActionQueue.AUTO_EXECUTION_COMMAND
+        command[self.TASK_ID] = self.get_unique_task_id()
+        return command
+      else:
+        logger.info("STOP command cannot be computed as details are not received from Server.")
+    else:
+      logger.info("Recovery is not enabled. STOP command will not be computed.")
+    return None
+    pass
+
   def get_restart_command(self, component):
     if self.paused:
       logger.info("Recovery is paused, likely tasks waiting in pipeline for this host.")

http://git-wip-us.apache.org/repos/asf/ambari/blob/3c563d3b/ambari-agent/src/test/python/ambari_agent/TestRecoveryManager.py
----------------------------------------------------------------------
diff --git a/ambari-agent/src/test/python/ambari_agent/TestRecoveryManager.py b/ambari-agent/src/test/python/ambari_agent/TestRecoveryManager.py
index aaf6e53..cb632d7 100644
--- a/ambari-agent/src/test/python/ambari_agent/TestRecoveryManager.py
+++ b/ambari-agent/src/test/python/ambari_agent/TestRecoveryManager.py
@@ -316,7 +316,8 @@ class TestRecoveryManager(TestCase):
        1200, 1201, 1203,
        4000, 4001, 4002, 4003,
        4100, 4101, 4102, 4103,
-       4200, 4201, 4202]
+       4200, 4201, 4202,
+       4300, 4301, 4302]
     rm = RecoveryManager(True)
     rm.update_config(15, 5, 1, 16, True, False)
 
@@ -383,6 +384,12 @@ class TestRecoveryManager(TestCase):
     self.assertEqual(1, len(commands))
     self.assertEqual("CUSTOM_COMMAND", commands[0]["roleCommand"])
     self.assertEqual("RESTART", commands[0]["hostLevelParams"]["custom_command"])
+
+    rm.update_current_status("NODEMANAGER", "STARTED")
+    rm.update_desired_status("NODEMANAGER", "INSTALLED")
+    commands = rm.get_recovery_commands()
+    self.assertEqual(1, len(commands))
+    self.assertEqual("STOP", commands[0]["roleCommand"])
     pass
 
   @patch.object(RecoveryManager, "update_config")


Mime
View raw message