ambari-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From aonis...@apache.org
Subject ambari git commit: AMBARI-15558. ambari-agent upstart script broken in RHEL6 (aonishuk)
Date Thu, 24 Mar 2016 15:14:53 GMT
Repository: ambari
Updated Branches:
  refs/heads/trunk c4791973d -> 22c3dcd5b


AMBARI-15558. ambari-agent upstart script broken in RHEL6 (aonishuk)


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/22c3dcd5
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/22c3dcd5
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/22c3dcd5

Branch: refs/heads/trunk
Commit: 22c3dcd5b366c0390d9d50d4273e28b4e3f2daa9
Parents: c479197
Author: Andrew Onishuk <aonishuk@hortonworks.com>
Authored: Thu Mar 24 17:14:29 2016 +0200
Committer: Andrew Onishuk <aonishuk@hortonworks.com>
Committed: Thu Mar 24 17:14:29 2016 +0200

----------------------------------------------------------------------
 ambari-agent/etc/init/ambari-agent.conf         |  1 -
 .../src/main/python/ambari_agent/ExitHelper.py  |  1 +
 .../python/ambari_agent/HeartbeatHandlers.py    | 27 ++++----------
 .../src/main/python/ambari_agent/main.py        | 38 ++++++++++++--------
 .../test/python/ambari_agent/TestController.py  |  5 ---
 .../src/test/python/ambari_agent/TestMain.py    |  6 ++--
 6 files changed, 35 insertions(+), 43 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/22c3dcd5/ambari-agent/etc/init/ambari-agent.conf
----------------------------------------------------------------------
diff --git a/ambari-agent/etc/init/ambari-agent.conf b/ambari-agent/etc/init/ambari-agent.conf
index 75c1b06..b3f2987 100644
--- a/ambari-agent/etc/init/ambari-agent.conf
+++ b/ambari-agent/etc/init/ambari-agent.conf
@@ -17,7 +17,6 @@ description     "ambari agent"
 
 stop on runlevel [06]
 
-kill signal SIGKILL
 respawn
 
 script

http://git-wip-us.apache.org/repos/asf/ambari/blob/22c3dcd5/ambari-agent/src/main/python/ambari_agent/ExitHelper.py
----------------------------------------------------------------------
diff --git a/ambari-agent/src/main/python/ambari_agent/ExitHelper.py b/ambari-agent/src/main/python/ambari_agent/ExitHelper.py
index 06dfadb..e51646f 100644
--- a/ambari-agent/src/main/python/ambari_agent/ExitHelper.py
+++ b/ambari-agent/src/main/python/ambari_agent/ExitHelper.py
@@ -64,6 +64,7 @@ class ExitHelper(object):
 
   def exit(self, code):
     self.execute_cleanup()
+    logger.info("Cleanup finished, exiting with code:" + str(code))
     os._exit(code)
 
 

http://git-wip-us.apache.org/repos/asf/ambari/blob/22c3dcd5/ambari-agent/src/main/python/ambari_agent/HeartbeatHandlers.py
----------------------------------------------------------------------
diff --git a/ambari-agent/src/main/python/ambari_agent/HeartbeatHandlers.py b/ambari-agent/src/main/python/ambari_agent/HeartbeatHandlers.py
index e0d90ac..7a9797d 100644
--- a/ambari-agent/src/main/python/ambari_agent/HeartbeatHandlers.py
+++ b/ambari-agent/src/main/python/ambari_agent/HeartbeatHandlers.py
@@ -96,18 +96,9 @@ def debug(sig, frame):
 
 @OsFamilyImpl(os_family=OsFamilyImpl.DEFAULT)
 class HeartbeatStopHandlersLinux(HeartbeatStopHandlers):
-  def __init__(self, stopEvent=None):
-    # Event is used for synchronizing heartbeat iterations (to make possible
-    # manual wait() interruption between heartbeats )
+  def __init__(self):
     self.heartbeat_wait_event = threading.Event()
-
-    # Event is used to stop the Agent process
-    if stopEvent is None:
-      # Allow standalone testing
-      self.stop_event = threading.Event()
-    else:
-      # Allow one unique event per process
-      self.stop_event = stopEvent
+    self._stop = False
 
   def set_heartbeat(self):
     self.heartbeat_wait_event.set()
@@ -116,19 +107,15 @@ class HeartbeatStopHandlersLinux(HeartbeatStopHandlers):
     self.heartbeat_wait_event.clear()
 
   def set_stop(self):
-    self.stop_event.set()
+    self._stop = True
 
   def wait(self, timeout1, timeout2=0):
-    if self.heartbeat_wait_event.wait(timeout=timeout1):
-      # Event signaled, exit
-      return 1
-    # Stop loop when stop event received
-    # Otherwise sleep a bit more to allow STATUS_COMMAND results to be collected
-    # and sent in one heartbeat. Also avoid server overload with heartbeats
-    if self.stop_event.wait(timeout=timeout2):
+    if self._stop:
       logger.info("Stop event received")
       return 0
-    # Timeout
+
+    if self.heartbeat_wait_event.wait(timeout=timeout1):
+      return 1
     return -1
 
 

http://git-wip-us.apache.org/repos/asf/ambari/blob/22c3dcd5/ambari-agent/src/main/python/ambari_agent/main.py
----------------------------------------------------------------------
diff --git a/ambari-agent/src/main/python/ambari_agent/main.py b/ambari-agent/src/main/python/ambari_agent/main.py
index 2b50cbb..8146859 100644
--- a/ambari-agent/src/main/python/ambari_agent/main.py
+++ b/ambari-agent/src/main/python/ambari_agent/main.py
@@ -69,6 +69,9 @@ def setup_logging(logger, filename, logging_level):
   logger.setLevel(logging_level)
   logger.info("loglevel=logging.{0}".format(logging._levelNames[logging_level]))
 
+GRACEFUL_STOP_TRIES = 10
+GRACEFUL_STOP_TRIES_SLEEP = 3
+
 
 def add_syslog_handler(logger):
     
@@ -161,22 +164,26 @@ def daemonize():
   pid = str(os.getpid())
   file(ProcessHelper.pidfile, 'w').write(pid)
 
-
 def stop_agent():
 # stop existing Ambari agent
   pid = -1
   runner = shellRunner()
   try:
-    f = open(ProcessHelper.pidfile, 'r')
-    pid = f.read()
+    with open(ProcessHelper.pidfile, 'r') as f:
+      pid = f.read()
     pid = int(pid)
-    f.close()
+    
     runner.run([AMBARI_SUDO_BINARY, 'kill', '-15', str(pid)])
-    time.sleep(5)
-    if os.path.exists(ProcessHelper.pidfile):
-      raise Exception("PID file still exists.")
-    sys.exit(0)
+    for i in range(GRACEFUL_STOP_TRIES):
+      result = runner.run([AMBARI_SUDO_BINARY, 'kill', '-0', str(pid)])
+      if result['exitCode'] != 0:
+        logger.info("Agent died gracefully, exiting.")
+        sys.exit(0)
+      time.sleep(GRACEFUL_STOP_TRIES_SLEEP)
+    logger.info("Agent not going to die gracefully, going to execute kill -9")
+    raise Exception("Agent is running")
   except Exception, err:
+    #raise
     if pid == -1:
       print ("Agent process is not running")
     else:
@@ -306,7 +313,8 @@ def main(heartbeat_stop_callback=None):
         # Launch Controller communication
         controller = Controller(config, server_hostname, heartbeat_stop_callback)
         controller.start()
-        controller.join()
+        while controller.is_alive():
+          time.sleep(0.1)
 
       #
       # If Ambari Agent connected to the server or
@@ -314,9 +322,7 @@ def main(heartbeat_stop_callback=None):
       # Clean up if not Windows OS
       #
       if connected or stopped:
-        if not OSCheck.get_os_family() == OSConst.WINSRV_FAMILY:
-          ExitHelper().execute_cleanup()
-          stop_agent()
+        ExitHelper().exit(0)
         logger.info("finished")
         break
     pass # for server_hostname in server_hostnames
@@ -330,7 +336,9 @@ if __name__ == "__main__":
     heartbeat_stop_callback = bind_signal_handlers(agentPid)
   
     main(heartbeat_stop_callback)
-  except:
+  except SystemExit as e:
+    raise e
+  except BaseException as e:
     if is_logger_setup:
-      logger.exception("Fatal exception occurred:")
-    raise
+      logger.exception("Exiting with exception:" + e)
+  raise

http://git-wip-us.apache.org/repos/asf/ambari/blob/22c3dcd5/ambari-agent/src/test/python/ambari_agent/TestController.py
----------------------------------------------------------------------
diff --git a/ambari-agent/src/test/python/ambari_agent/TestController.py b/ambari-agent/src/test/python/ambari_agent/TestController.py
index 05448da..5604769 100644
--- a/ambari-agent/src/test/python/ambari_agent/TestController.py
+++ b/ambari-agent/src/test/python/ambari_agent/TestController.py
@@ -548,8 +548,6 @@ class TestController(unittest.TestCase):
     response["restartAgent"] = "false"
     self.controller.heartbeatWithServer()
 
-    event_mock.assert_any_call(timeout=
-      self.controller.netutil.MINIMUM_INTERVAL_BETWEEN_HEARTBEATS)
 
     # Check that server continues to heartbeat after connection errors
     self.controller.responseId = 1
@@ -569,9 +567,6 @@ class TestController(unittest.TestCase):
     self.controller.heartbeatWithServer()
     self.assertTrue(sendRequest.call_count > 5)
 
-    event_mock.assert_called_with(timeout=
-      self.controller.netutil.MINIMUM_INTERVAL_BETWEEN_HEARTBEATS)
-
     sys.stdout = sys.__stdout__
     self.controller.sendRequest = Controller.Controller.sendRequest
     self.controller.sendRequest = Controller.Controller.addToQueue

http://git-wip-us.apache.org/repos/asf/ambari/blob/22c3dcd5/ambari-agent/src/test/python/ambari_agent/TestMain.py
----------------------------------------------------------------------
diff --git a/ambari-agent/src/test/python/ambari_agent/TestMain.py b/ambari-agent/src/test/python/ambari_agent/TestMain.py
index cffe6c0..477520e 100644
--- a/ambari-agent/src/test/python/ambari_agent/TestMain.py
+++ b/ambari-agent/src/test/python/ambari_agent/TestMain.py
@@ -44,6 +44,7 @@ with patch.object(OSCheck, "os_distribution", new = MagicMock(return_value
= os_
   from ambari_commons.os_check import OSConst, OSCheck
   from ambari_agent.ExitHelper import ExitHelper
 
+
 class TestMain(unittest.TestCase):
 
   def setUp(self):
@@ -304,8 +305,8 @@ class TestMain(unittest.TestCase):
   @patch.object(main, "update_log_level")
   @patch.object(NetUtil.NetUtil, "try_to_connect")
   @patch.object(Controller, "__init__")
+  @patch.object(Controller, "is_alive")
   @patch.object(Controller, "start")
-  @patch.object(Controller, "join")
   @patch("optparse.OptionParser.parse_args")
   @patch.object(DataCleaner,"start")
   @patch.object(DataCleaner,"__init__")
@@ -313,13 +314,14 @@ class TestMain(unittest.TestCase):
   @patch.object(PingPortListener,"__init__")
   @patch.object(ExitHelper,"execute_cleanup")
   def test_main(self, cleanup_mock, ping_port_init_mock, ping_port_start_mock, data_clean_init_mock,data_clean_start_mock,
-                parse_args_mock, join_mock, start_mock, Controller_init_mock, try_to_connect_mock,
+                parse_args_mock, start_mock, Controller_is_alive_mock, Controller_init_mock,
try_to_connect_mock,
                 update_log_level_mock, daemonize_mock, perform_prestart_checks_mock,
                 ambari_config_mock,
                 stop_mock, bind_signal_handlers_mock,
                 setup_logging_mock, socket_mock):
     data_clean_init_mock.return_value = None
     Controller_init_mock.return_value = None
+    Controller_is_alive_mock.return_value = False
     ping_port_init_mock.return_value = None
     options = MagicMock()
     parse_args_mock.return_value = (options, MagicMock)


Mime
View raw message