ambari-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From smoha...@apache.org
Subject ambari git commit: AMBARI-13463. Auto start should allow selection of components that can be auto-started (smohanty)
Date Mon, 19 Oct 2015 01:33:31 GMT
Repository: ambari
Updated Branches:
  refs/heads/trunk 4017f064c -> 4ba6aceb2


AMBARI-13463. Auto start should allow selection of components that can be auto-started (smohanty)


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/4ba6aceb
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/4ba6aceb
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/4ba6aceb

Branch: refs/heads/trunk
Commit: 4ba6aceb270037a6e1c33db3ed394b13e2add05c
Parents: 4017f06
Author: Sumit Mohanty <smohanty@hortonworks.com>
Authored: Sun Oct 18 18:32:44 2015 -0700
Committer: Sumit Mohanty <smohanty@hortonworks.com>
Committed: Sun Oct 18 18:33:18 2015 -0700

----------------------------------------------------------------------
 .../src/main/python/ambari_agent/ActionQueue.py |  11 +-
 .../main/python/ambari_agent/RecoveryManager.py |  70 +++++++++--
 .../test/python/ambari_agent/TestActionQueue.py |   2 +-
 .../python/ambari_agent/TestRecoveryManager.py  | 123 +++++++++++++++----
 .../ambari/server/agent/HeartBeatHandler.java   |   2 +-
 .../ambari/server/agent/RecoveryConfig.java     |  26 ++++
 .../server/configuration/Configuration.java     |  30 ++++-
 7 files changed, 221 insertions(+), 43 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/4ba6aceb/ambari-agent/src/main/python/ambari_agent/ActionQueue.py
----------------------------------------------------------------------
diff --git a/ambari-agent/src/main/python/ambari_agent/ActionQueue.py b/ambari-agent/src/main/python/ambari_agent/ActionQueue.py
index 6a64f99..b82afe8 100644
--- a/ambari-agent/src/main/python/ambari_agent/ActionQueue.py
+++ b/ambari-agent/src/main/python/ambari_agent/ActionQueue.py
@@ -326,7 +326,8 @@ class ActionQueue(threading.Thread):
 
     # let recovery manager know the current state
     if status == self.COMPLETED_STATUS:
-      if self.controller.recovery_manager.enabled() and command.has_key('roleCommand'):
+      if self.controller.recovery_manager.enabled() and command.has_key('roleCommand') \
+          and self.controller.recovery_manager.configured_for_recovery(command['role']):
         if command['roleCommand'] == self.ROLE_COMMAND_START:
           self.controller.recovery_manager.update_current_status(command['role'], LiveStatus.LIVE_STATUS)
           self.controller.recovery_manager.update_config_staleness(command['role'], False)
@@ -441,10 +442,14 @@ class ActionQueue(threading.Thread):
 
       if component_status_result['exitcode'] == 0:
         component_status = LiveStatus.LIVE_STATUS
-        self.controller.recovery_manager.update_current_status(component, component_status)
+        if self.controller.recovery_manager.enabled() \
+            and self.controller.recovery_manager.configured_for_recovery(component):
+          self.controller.recovery_manager.update_current_status(component, component_status)
       else:
         component_status = LiveStatus.DEAD_STATUS
-        self.controller.recovery_manager.update_current_status(component, component_status)
+        if self.controller.recovery_manager.enabled() \
+            and self.controller.recovery_manager.configured_for_recovery(component):
+          self.controller.recovery_manager.update_current_status(component, component_status)
       request_execution_cmd = self.controller.recovery_manager.requires_recovery(component)
 
       if component_status_result.has_key('structuredOut'):

http://git-wip-us.apache.org/repos/asf/ambari/blob/4ba6aceb/ambari-agent/src/main/python/ambari_agent/RecoveryManager.py
----------------------------------------------------------------------
diff --git a/ambari-agent/src/main/python/ambari_agent/RecoveryManager.py b/ambari-agent/src/main/python/ambari_agent/RecoveryManager.py
index 595c4fc..cab81f5 100644
--- a/ambari-agent/src/main/python/ambari_agent/RecoveryManager.py
+++ b/ambari-agent/src/main/python/ambari_agent/RecoveryManager.py
@@ -85,6 +85,8 @@ class RecoveryManager:
     self.id = int(time.time())
     self.allowed_desired_states = [self.STARTED, self.INSTALLED]
     self.allowed_current_states = [self.INIT, self.INSTALLED]
+    self.enabled_components = []
+    self.disabled_components = []
     self.actions = {}
     self.statuses = {}
     self.__status_lock = threading.RLock()
@@ -93,7 +95,7 @@ class RecoveryManager:
     self.active_command_count = 0
     self.paused = False
 
-    self.update_config(6, 60, 5, 12, recovery_enabled, auto_start_only)
+    self.update_config(6, 60, 5, 12, recovery_enabled, auto_start_only, "", "")
 
     pass
 
@@ -186,6 +188,21 @@ class RecoveryManager:
       self.remove_command(component)
     pass
 
+  """
+  Whether specific components are enabled/disabled for recovery. Being enabled takes
+  precedence over being disabled. When specific components are enabled then only
+  those components are enabled. When specific components are disabled then all of
+  the other components are enabled.
+  """
+  def configured_for_recovery(self, component):
+    if len(self.disabled_components) == 0 and len(self.enabled_components) == 0:
+      return True
+    if len(self.disabled_components) > 0 and component not in self.disabled_components
\
+        and len(self.enabled_components) == 0:
+      return True
+    if len(self.enabled_components) > 0 and component in self.enabled_components:
+      return True
+    return False
 
   def requires_recovery(self, component):
     """
@@ -197,6 +214,9 @@ class RecoveryManager:
     if not self.enabled():
       return False
 
+    if not self.configured_for_recovery(component):
+      return False
+
     if component not in self.statuses:
       return False
 
@@ -433,7 +453,9 @@ class RecoveryManager:
       "type" : "DEFAULT|AUTO_START|FULL",
       "maxCount" : 10,
       "windowInMinutes" : 60,
-      "retryGap" : 0 }
+      "retryGap" : 0,
+      "disabledComponents" : "a,b",
+      "enabledComponents" : "c,d"}
     """
 
     recovery_enabled = False
@@ -442,8 +464,12 @@ class RecoveryManager:
     window_in_min = 60
     retry_gap = 5
     max_lifetime_count = 12
+    enabled_components = ""
+    disabled_components = ""
+
 
     if reg_resp and "recoveryConfig" in reg_resp:
+      logger.info("RecoverConfig = " + pprint.pformat(reg_resp["recoveryConfig"]))
       config = reg_resp["recoveryConfig"]
       if "type" in config:
         if config["type"] in ["AUTO_START", "FULL"]:
@@ -458,11 +484,18 @@ class RecoveryManager:
         retry_gap = self._read_int_(config["retryGap"], retry_gap)
       if 'maxLifetimeCount' in config:
         max_lifetime_count = self._read_int_(config['maxLifetimeCount'], max_lifetime_count)
-    self.update_config(max_count, window_in_min, retry_gap, max_lifetime_count, recovery_enabled,
auto_start_only)
+
+      if 'enabledComponents' in config:
+        enabled_components = config['enabledComponents']
+      if 'disabledComponents' in config:
+        disabled_components = config['disabledComponents']
+    self.update_config(max_count, window_in_min, retry_gap, max_lifetime_count, recovery_enabled,
auto_start_only,
+                       enabled_components, disabled_components)
     pass
 
 
-  def update_config(self, max_count, window_in_min, retry_gap, max_lifetime_count, recovery_enabled,
auto_start_only):
+  def update_config(self, max_count, window_in_min, retry_gap, max_lifetime_count, recovery_enabled,
+                    auto_start_only, enabled_components, disabled_components):
     """
     Update recovery configuration, recovery is disabled if configuration values
     are not correct
@@ -493,6 +526,8 @@ class RecoveryManager:
     self.retry_gap_in_sec = retry_gap * 60
     self.auto_start_only = auto_start_only
     self.max_lifetime_count = max_lifetime_count
+    self.disabled_components = []
+    self.enabled_components = []
 
     self.allowed_desired_states = [self.STARTED, self.INSTALLED]
     self.allowed_current_states = [self.INIT, self.INSTALLED, self.STARTED]
@@ -501,11 +536,25 @@ class RecoveryManager:
       self.allowed_desired_states = [self.STARTED]
       self.allowed_current_states = [self.INSTALLED]
 
+    if enabled_components is not None and len(enabled_components) > 0:
+      components = enabled_components.split(",")
+      for component in components:
+        if len(component.strip()) > 0:
+          self.enabled_components.append(component.strip())
+
+    if disabled_components is not None and len(disabled_components) > 0:
+      components = disabled_components.split(",")
+      for component in components:
+        if len(component.strip()) > 0:
+          self.disabled_components.append(component.strip())
+
     self.recovery_enabled = recovery_enabled
     if self.recovery_enabled:
       logger.info(
-        "==> Auto recovery is enabled with maximum %s in %s minutes with gap of %s minutes
between and lifetime max being %s.",
-        self.max_count, self.window_in_min, self.retry_gap, self.max_lifetime_count)
+        "==> Auto recovery is enabled with maximum %s in %s minutes with gap of %s minutes
between and"
+        " lifetime max being %s. Enabled components - %s and Disabled components - %s",
+        self.max_count, self.window_in_min, self.retry_gap, self.max_lifetime_count,
+        ', '.join(self.enabled_components), ', '.join(self.disabled_components))
     pass
 
 
@@ -536,16 +585,19 @@ class RecoveryManager:
       for command in commands:
         if self.COMMAND_TYPE in command and command[self.COMMAND_TYPE] == ActionQueue.EXECUTION_COMMAND:
           if self.ROLE in command:
-            if command[self.ROLE_COMMAND] in (ActionQueue.ROLE_COMMAND_INSTALL, ActionQueue.ROLE_COMMAND_STOP):
+            if command[self.ROLE_COMMAND] in (ActionQueue.ROLE_COMMAND_INSTALL, ActionQueue.ROLE_COMMAND_STOP)
\
+                and self.configured_for_recovery(command[self.ROLE]):
               self.update_desired_status(command[self.ROLE], LiveStatus.DEAD_STATUS)
               logger.info("Received EXECUTION_COMMAND (STOP/INSTALL), desired state of "
+ command[self.ROLE] + " to " +
                            self.get_desired_status(command[self.ROLE]) )
-            elif command[self.ROLE_COMMAND] == ActionQueue.ROLE_COMMAND_START:
+            elif command[self.ROLE_COMMAND] == ActionQueue.ROLE_COMMAND_START \
+                and self.configured_for_recovery(command[self.ROLE]):
               self.update_desired_status(command[self.ROLE], LiveStatus.LIVE_STATUS)
               logger.info("Received EXECUTION_COMMAND (START), desired state of " + command[self.ROLE]
+ " to " +
                            self.get_desired_status(command[self.ROLE]) )
             elif command[self.HOST_LEVEL_PARAMS].has_key('custom_command') and \
-                    command[self.HOST_LEVEL_PARAMS]['custom_command'] == ActionQueue.CUSTOM_COMMAND_RESTART:
+                    command[self.HOST_LEVEL_PARAMS]['custom_command'] == ActionQueue.CUSTOM_COMMAND_RESTART
\
+                    and self.configured_for_recovery(command[self.ROLE]):
               self.update_desired_status(command[self.ROLE], LiveStatus.LIVE_STATUS)
               logger.info("Received EXECUTION_COMMAND (RESTART), desired state of " + command[self.ROLE]
+ " to " +
                            self.get_desired_status(command[self.ROLE]) )

http://git-wip-us.apache.org/repos/asf/ambari/blob/4ba6aceb/ambari-agent/src/test/python/ambari_agent/TestActionQueue.py
----------------------------------------------------------------------
diff --git a/ambari-agent/src/test/python/ambari_agent/TestActionQueue.py b/ambari-agent/src/test/python/ambari_agent/TestActionQueue.py
index f5ea107..a583131 100644
--- a/ambari-agent/src/test/python/ambari_agent/TestActionQueue.py
+++ b/ambari-agent/src/test/python/ambari_agent/TestActionQueue.py
@@ -341,7 +341,7 @@ class TestActionQueue(TestCase):
     config.set('agent', 'tolerate_download_failures', "true")
     dummy_controller = MagicMock()
     dummy_controller.recovery_manager = RecoveryManager()
-    dummy_controller.recovery_manager.update_config(5, 5, 1, 11, True, False)
+    dummy_controller.recovery_manager.update_config(5, 5, 1, 11, True, False, "", "")
 
     actionQueue = ActionQueue(config, dummy_controller)
     unfreeze_flag = threading.Event()

http://git-wip-us.apache.org/repos/asf/ambari/blob/4ba6aceb/ambari-agent/src/test/python/ambari_agent/TestRecoveryManager.py
----------------------------------------------------------------------
diff --git a/ambari-agent/src/test/python/ambari_agent/TestRecoveryManager.py b/ambari-agent/src/test/python/ambari_agent/TestRecoveryManager.py
index 1669a2c..e6115e3 100644
--- a/ambari-agent/src/test/python/ambari_agent/TestRecoveryManager.py
+++ b/ambari-agent/src/test/python/ambari_agent/TestRecoveryManager.py
@@ -173,32 +173,32 @@ class TestRecoveryManager(TestCase):
     rm = RecoveryManager(True, False)
     self.assertTrue(rm.enabled())
 
-    rm.update_config(0, 60, 5, 12, True, False)
+    rm.update_config(0, 60, 5, 12, True, False, "", "")
     self.assertFalse(rm.enabled())
 
-    rm.update_config(6, 60, 5, 12, True, False)
+    rm.update_config(6, 60, 5, 12, True, False, "", "")
     self.assertTrue(rm.enabled())
 
-    rm.update_config(6, 0, 5, 12, True, False)
+    rm.update_config(6, 0, 5, 12, True, False, "", "")
     self.assertFalse(rm.enabled())
 
-    rm.update_config(6, 60, 0, 12, True, False)
+    rm.update_config(6, 60, 0, 12, True, False, "", "")
     self.assertFalse(rm.enabled())
 
-    rm.update_config(6, 60, 1, 12, True, False)
+    rm.update_config(6, 60, 1, 12, True, False, None, None)
     self.assertTrue(rm.enabled())
 
-    rm.update_config(6, 60, 61, 12, True, False)
+    rm.update_config(6, 60, 61, 12, True, False, "", None)
     self.assertFalse(rm.enabled())
 
-    rm.update_config(6, 60, 5, 0, True, False)
+    rm.update_config(6, 60, 5, 0, True, False, None, "")
     self.assertFalse(rm.enabled())
 
-    rm.update_config(6, 60, 5, 4, True, False)
+    rm.update_config(6, 60, 5, 4, True, False, "", "")
     self.assertFalse(rm.enabled())
 
     # maximum 2 in 2 minutes and at least 1 minute wait
-    rm.update_config(2, 5, 1, 4, True, False)
+    rm.update_config(2, 5, 1, 4, True, False, "", "")
     self.assertTrue(rm.enabled())
 
     # T = 1000-2
@@ -224,7 +224,7 @@ class TestRecoveryManager(TestCase):
     self.assertFalse(rm.may_execute("NODEMANAGER"))  # too soon
 
     # maximum 2 in 2 minutes and no min wait
-    rm.update_config(2, 5, 1, 5, True, True)
+    rm.update_config(2, 5, 1, 5, True, True, "", "")
 
     # T = 1500-3
     self.assertTrue(rm.execute("NODEMANAGER2"))
@@ -286,6 +286,55 @@ class TestRecoveryManager(TestCase):
     rm.update_current_status("NODEMANAGER", "INSTALLED")
     rm.update_desired_status("NODEMANAGER", "START")
     self.assertFalse(rm.requires_recovery("NODEMANAGER"))
+
+    pass
+
+  def test_recovery_required2(self):
+
+    rm = RecoveryManager(True, True)
+    rm.update_config(15, 5, 1, 16, True, False, "", "")
+    rm.update_current_status("NODEMANAGER", "INSTALLED")
+    rm.update_desired_status("NODEMANAGER", "STARTED")
+    self.assertTrue(rm.requires_recovery("NODEMANAGER"))
+
+    rm = RecoveryManager(True, True)
+    rm.update_config(15, 5, 1, 16, True, False, "NODEMANAGER", "")
+    rm.update_current_status("NODEMANAGER", "INSTALLED")
+    rm.update_desired_status("NODEMANAGER", "STARTED")
+    self.assertTrue(rm.requires_recovery("NODEMANAGER"))
+
+    rm.update_current_status("DATANODE", "INSTALLED")
+    rm.update_desired_status("DATANODE", "STARTED")
+    self.assertFalse(rm.requires_recovery("DATANODE"))
+
+    rm = RecoveryManager(True, True)
+    rm.update_config(15, 5, 1, 16, True, False, "", "NODEMANAGER")
+    rm.update_current_status("NODEMANAGER", "INSTALLED")
+    rm.update_desired_status("NODEMANAGER", "STARTED")
+    self.assertFalse(rm.requires_recovery("NODEMANAGER"))
+
+    rm.update_current_status("DATANODE", "INSTALLED")
+    rm.update_desired_status("DATANODE", "STARTED")
+    self.assertTrue(rm.requires_recovery("DATANODE"))
+
+    rm.update_config(15, 5, 1, 16, True, False, "", "NODEMANAGER")
+    rm.update_config(15, 5, 1, 16, True, False, "NODEMANAGER", "")
+    rm.update_current_status("NODEMANAGER", "INSTALLED")
+    rm.update_desired_status("NODEMANAGER", "STARTED")
+    self.assertTrue(rm.requires_recovery("NODEMANAGER"))
+
+    rm.update_current_status("DATANODE", "INSTALLED")
+    rm.update_desired_status("DATANODE", "STARTED")
+    self.assertFalse(rm.requires_recovery("DATANODE"))
+
+    rm.update_config(15, 5, 1, 16, True, False, "NODEMANAGER", "NODEMANAGER")
+    rm.update_current_status("NODEMANAGER", "INSTALLED")
+    rm.update_desired_status("NODEMANAGER", "STARTED")
+    self.assertTrue(rm.requires_recovery("NODEMANAGER"))
+
+    rm.update_current_status("DATANODE", "INSTALLED")
+    rm.update_desired_status("DATANODE", "STARTED")
+    self.assertFalse(rm.requires_recovery("DATANODE"))
     pass
 
   @patch('time.time', MagicMock(side_effects=[1]))
@@ -343,7 +392,7 @@ class TestRecoveryManager(TestCase):
        4200, 4201, 4202,
        4300, 4301, 4302]
     rm = RecoveryManager(True)
-    rm.update_config(15, 5, 1, 16, True, False)
+    rm.update_config(15, 5, 1, 16, True, False, "", "")
 
     command1 = copy.deepcopy(self.command)
 
@@ -374,14 +423,14 @@ class TestRecoveryManager(TestCase):
     self.assertEqual(1, len(commands))
     self.assertEqual("INSTALL", commands[0]["roleCommand"])
 
-    rm.update_config(2, 5, 1, 5, True, True)
+    rm.update_config(2, 5, 1, 5, True, True, "", "")
     rm.update_current_status("NODEMANAGER", "INIT")
     rm.update_desired_status("NODEMANAGER", "INSTALLED")
 
     commands = rm.get_recovery_commands()
     self.assertEqual(0, len(commands))
 
-    rm.update_config(12, 5, 1, 15, True, False)
+    rm.update_config(12, 5, 1, 15, True, False, "", "")
     rm.update_current_status("NODEMANAGER", "INIT")
     rm.update_desired_status("NODEMANAGER", "INSTALLED")
 
@@ -422,25 +471,25 @@ class TestRecoveryManager(TestCase):
   def test_update_rm_config(self, mock_uc):
     rm = RecoveryManager()
     rm.update_configuration_from_registration(None)
-    mock_uc.assert_has_calls([call(6, 60, 5, 12, False, True)])
+    mock_uc.assert_has_calls([call(6, 60, 5, 12, False, True, "", "")])
 
     mock_uc.reset_mock()
     rm.update_configuration_from_registration({})
-    mock_uc.assert_has_calls([call(6, 60, 5, 12, False, True)])
+    mock_uc.assert_has_calls([call(6, 60, 5, 12, False, True, "", "")])
 
     mock_uc.reset_mock()
     rm.update_configuration_from_registration(
       {"recoveryConfig": {
       "type" : "DEFAULT"}}
     )
-    mock_uc.assert_has_calls([call(6, 60, 5, 12, False, True)])
+    mock_uc.assert_has_calls([call(6, 60, 5, 12, False, True, "", "")])
 
     mock_uc.reset_mock()
     rm.update_configuration_from_registration(
       {"recoveryConfig": {
         "type" : "FULL"}}
     )
-    mock_uc.assert_has_calls([call(6, 60, 5, 12, True, False)])
+    mock_uc.assert_has_calls([call(6, 60, 5, 12, True, False, "", "")])
 
     mock_uc.reset_mock()
     rm.update_configuration_from_registration(
@@ -448,7 +497,7 @@ class TestRecoveryManager(TestCase):
         "type" : "AUTO_START",
         "max_count" : "med"}}
     )
-    mock_uc.assert_has_calls([call(6, 60, 5, 12, True, True)])
+    mock_uc.assert_has_calls([call(6, 60, 5, 12, True, True, "", "")])
 
     mock_uc.reset_mock()
     rm.update_configuration_from_registration(
@@ -457,9 +506,11 @@ class TestRecoveryManager(TestCase):
         "maxCount" : "5",
         "windowInMinutes" : 20,
         "retryGap" : 2,
-        "maxLifetimeCount" : 5}}
+        "maxLifetimeCount" : 5,
+        "enabledComponents" : " A,B",
+        "disabledComponents" : "C"}}
     )
-    mock_uc.assert_has_calls([call(5, 20, 2, 5, True, True)])
+    mock_uc.assert_has_calls([call(5, 20, 2, 5, True, True, " A,B", "C")])
   pass
 
   @patch.object(RecoveryManager, "_now_")
@@ -471,7 +522,7 @@ class TestRecoveryManager(TestCase):
     rec_st = rm.get_recovery_status()
     self.assertEquals(rec_st, {"summary": "DISABLED"})
 
-    rm.update_config(2, 5, 1, 4, True, True)
+    rm.update_config(2, 5, 1, 4, True, True, "", "")
     rec_st = rm.get_recovery_status()
     self.assertEquals(rec_st, {"summary": "RECOVERABLE", "componentReports": []})
 
@@ -515,7 +566,7 @@ class TestRecoveryManager(TestCase):
       [1000, 1001, 1002, 1003, 1104, 1105, 1106, 1807, 1808, 1809, 1810, 1811, 1812]
 
     rm = RecoveryManager(True)
-    rm.update_config(5, 5, 1, 11, True, False)
+    rm.update_config(5, 5, 1, 11, True, False, "", "")
 
     command1 = copy.deepcopy(self.command)
 
@@ -552,4 +603,30 @@ class TestRecoveryManager(TestCase):
     rm.stop_execution_command()
     self.assertTrue(rm.has_active_command())
     rm.stop_execution_command()
-    self.assertFalse(rm.has_active_command())
\ No newline at end of file
+    self.assertFalse(rm.has_active_command())
+
+  def test_configured_for_recovery(self):
+    rm = RecoveryManager(True)
+    self.assertTrue(rm.configured_for_recovery("A"))
+    self.assertTrue(rm.configured_for_recovery("B"))
+
+    rm.update_config(5, 5, 1, 11, True, False, "", "")
+    self.assertTrue(rm.configured_for_recovery("A"))
+    self.assertTrue(rm.configured_for_recovery("B"))
+
+    rm.update_config(5, 5, 1, 11, True, False, "A", "")
+    self.assertTrue(rm.configured_for_recovery("A"))
+    self.assertFalse(rm.configured_for_recovery("B"))
+
+    rm.update_config(5, 5, 1, 11, True, False, "", "B,C")
+    self.assertTrue(rm.configured_for_recovery("A"))
+    self.assertFalse(rm.configured_for_recovery("B"))
+    self.assertFalse(rm.configured_for_recovery("C"))
+
+    rm.update_config(5, 5, 1, 11, True, False, "A, D, F ", "B,C")
+    self.assertTrue(rm.configured_for_recovery("A"))
+    self.assertFalse(rm.configured_for_recovery("B"))
+    self.assertFalse(rm.configured_for_recovery("C"))
+    self.assertTrue(rm.configured_for_recovery("D"))
+    self.assertFalse(rm.configured_for_recovery("E"))
+    self.assertTrue(rm.configured_for_recovery("F"))

http://git-wip-us.apache.org/repos/asf/ambari/blob/4ba6aceb/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
b/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
index 29364e9..01b6e2c 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/agent/HeartBeatHandler.java
@@ -965,7 +965,7 @@ public class HeartBeatHandler {
     }
     response.setRecoveryConfig(RecoveryConfig.getRecoveryConfig(config));
     if(response.getRecoveryConfig() != null) {
-      LOG.debug("Recovery configuration set to " + response.getRecoveryConfig().toString());
+      LOG.info("Recovery configuration set to " + response.getRecoveryConfig().toString());
     }
 
     Long requestId = 0L;

http://git-wip-us.apache.org/repos/asf/ambari/blob/4ba6aceb/ambari-server/src/main/java/org/apache/ambari/server/agent/RecoveryConfig.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/agent/RecoveryConfig.java
b/ambari-server/src/main/java/org/apache/ambari/server/agent/RecoveryConfig.java
index 9ebfb49..3f558eb 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/agent/RecoveryConfig.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/agent/RecoveryConfig.java
@@ -48,6 +48,28 @@ public class RecoveryConfig {
   @SerializedName("maxLifetimeCount")
   private String maxLifetimeCount;
 
+  @SerializedName("enabledComponents")
+  private String enabledComponents;
+
+  @SerializedName("disabledComponents")
+  private String disabledComponents;
+
+
+  public String getDisabledComponents() {
+    return disabledComponents;
+  }
+
+  public void setDisabledComponents(String disabledComponents) {
+    this.disabledComponents = disabledComponents;
+  }
+
+  public String getEnabledComponents() {
+    return enabledComponents;
+  }
+
+  public void setEnabledComponents(String enabledComponents) {
+    this.enabledComponents = enabledComponents;
+  }
 
   public String getType() {
     return type;
@@ -96,6 +118,8 @@ public class RecoveryConfig {
     rc.setRetryGap(conf.getNodeRecoveryRetryGap());
     rc.setType(conf.getNodeRecoveryType());
     rc.setWindowInMinutes(conf.getNodeRecoveryWindowInMin());
+    rc.setDisabledComponents(conf.getDisabledComponents());
+    rc.setEnabledComponents(conf.getEnabledComponents());
     return rc;
   }
 
@@ -107,6 +131,8 @@ public class RecoveryConfig {
     buffer.append(", windowInMinutes=").append(windowInMinutes);
     buffer.append(", retryGap=").append(retryGap);
     buffer.append(", maxLifetimeCount=").append(maxLifetimeCount);
+    buffer.append(", disabledComponents=").append(disabledComponents);
+    buffer.append(", enabledComponents=").append(enabledComponents);
     buffer.append('}');
     return buffer.toString();
   }

http://git-wip-us.apache.org/repos/asf/ambari/blob/4ba6aceb/ambari-server/src/main/java/org/apache/ambari/server/configuration/Configuration.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/configuration/Configuration.java
b/ambari-server/src/main/java/org/apache/ambari/server/configuration/Configuration.java
index 23f9803fe..702e12d 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/configuration/Configuration.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/configuration/Configuration.java
@@ -300,6 +300,8 @@ public class Configuration {
   public static final String RECOVERY_WINDOW_IN_MIN_DEFAULT = "60";
   public static final String RECOVERY_RETRY_GAP_KEY = "recovery.retry_interval";
   public static final String RECOVERY_RETRY_GAP_DEFAULT = "5";
+  public static final String RECOVERY_DISABLED_COMPONENTS_KEY = "recovery.disabled_components";
+  public static final String RECOVERY_ENABLED_COMPONENTS_KEY = "recovery.enabled_components";
 
   /**
    * Allow proxy calls to these hosts and ports only
@@ -1672,17 +1674,17 @@ public class Configuration {
 
   public Integer getRequestReadTimeout() {
     return Integer.parseInt(properties.getProperty(REQUEST_READ_TIMEOUT,
-      REQUEST_READ_TIMEOUT_DEFAULT));
+                                                   REQUEST_READ_TIMEOUT_DEFAULT));
   }
 
   public Integer getRequestConnectTimeout() {
     return Integer.parseInt(properties.getProperty(REQUEST_CONNECT_TIMEOUT,
-        REQUEST_CONNECT_TIMEOUT_DEFAULT));
+                                                   REQUEST_CONNECT_TIMEOUT_DEFAULT));
   }
 
   public String getExecutionSchedulerConnections() {
     return properties.getProperty(EXECUTION_SCHEDULER_CONNECTIONS,
-      DEFAULT_SCHEDULER_MAX_CONNECTIONS);
+                                  DEFAULT_SCHEDULER_MAX_CONNECTIONS);
   }
 
   public Long getExecutionSchedulerMisfireToleration() {
@@ -1708,7 +1710,7 @@ public class Configuration {
 
   public String getCustomActionDefinitionPath() {
     return properties.getProperty(CUSTOM_ACTION_DEFINITION_KEY,
-      CUSTOM_ACTION_DEFINITION_DEF_VALUE);
+                                  CUSTOM_ACTION_DEFINITION_DEF_VALUE);
   }
 
   public int getAgentPackageParallelCommandsLimit() {
@@ -1757,7 +1759,7 @@ public class Configuration {
    */
   public int getClientThreadPoolSize() {
     return Integer.parseInt(properties.getProperty(
-      CLIENT_THREADPOOL_SIZE_KEY, String.valueOf(CLIENT_THREADPOOL_SIZE_DEFAULT)));
+        CLIENT_THREADPOOL_SIZE_KEY, String.valueOf(CLIENT_THREADPOOL_SIZE_DEFAULT)));
   }
 
   /**
@@ -1795,7 +1797,7 @@ public class Configuration {
    */
   public long getViewExtractionThreadPoolTimeout() {
     return Long.parseLong(properties.getProperty(
-      VIEW_EXTRACTION_THREADPOOL_TIMEOUT_KEY, String.valueOf(VIEW_EXTRACTION_THREADPOOL_TIMEOUT_DEFAULT)));
+        VIEW_EXTRACTION_THREADPOOL_TIMEOUT_KEY, String.valueOf(VIEW_EXTRACTION_THREADPOOL_TIMEOUT_DEFAULT)));
   }
 
   /**
@@ -1865,6 +1867,22 @@ public class Configuration {
   }
 
   /**
+   * Get the components for which recovery is disabled
+   * @return
+   */
+  public String getDisabledComponents() {
+    return properties.getProperty(RECOVERY_DISABLED_COMPONENTS_KEY, "");
+  }
+
+  /**
+   * Get the components for which recovery is enabled
+   * @return
+   */
+  public String getEnabledComponents() {
+    return properties.getProperty(RECOVERY_ENABLED_COMPONENTS_KEY, "");
+  }
+
+  /**
    * Get the configured retry gap between tries per host component
    * @return
    */


Mime
View raw message