ambari-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From nc...@apache.org
Subject [03/16] ambari git commit: AMBARI-18704. Add code to improve debugging of ambari-agent related problems. (aonishuk)
Date Thu, 27 Oct 2016 15:20:31 GMT
AMBARI-18704. Add code to improve debugging of ambari-agent related problems. (aonishuk)


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/aa588ca8
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/aa588ca8
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/aa588ca8

Branch: refs/heads/branch-feature-AMBARI-18634
Commit: aa588ca8667f5f67b6be2251a6dad37230172fb4
Parents: 7b30be6
Author: Andrew Onishuk <aonishuk@hortonworks.com>
Authored: Wed Oct 26 20:08:46 2016 +0300
Committer: Andrew Onishuk <aonishuk@hortonworks.com>
Committed: Wed Oct 26 20:08:46 2016 +0300

----------------------------------------------------------------------
 .../python/ambari_agent/HeartbeatHandlers.py    | 13 ++++--------
 .../python/ambari_agent/RemoteDebugUtils.py     | 21 +++++++++++++++++++-
 .../ambari_agent/StatusCommandsExecutor.py      | 12 ++++++++---
 3 files changed, 33 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/aa588ca8/ambari-agent/src/main/python/ambari_agent/HeartbeatHandlers.py
----------------------------------------------------------------------
diff --git a/ambari-agent/src/main/python/ambari_agent/HeartbeatHandlers.py b/ambari-agent/src/main/python/ambari_agent/HeartbeatHandlers.py
index 4a3d372..836ab07 100644
--- a/ambari-agent/src/main/python/ambari_agent/HeartbeatHandlers.py
+++ b/ambari-agent/src/main/python/ambari_agent/HeartbeatHandlers.py
@@ -26,9 +26,10 @@ import signal
 import threading
 import traceback
 from ambari_commons.os_family_impl import OsFamilyImpl
-from RemoteDebugUtils import remote_debug
 import sys
 
+from ambari_agent.RemoteDebugUtils import bind_debug_signal_handlers
+
 logger = logging.getLogger()
 
 _handler = None
@@ -128,14 +129,8 @@ def bind_signal_handlers(agentPid):
     if os.getpid() == agentPid:
       signal.signal(signal.SIGINT, signal_handler)
       signal.signal(signal.SIGTERM, signal_handler)
-      signal.signal(signal.SIGUSR2, remote_debug) # Interrupt running process, and provide
a python prompt for it
-      try:
-        import faulthandler  # This is not default module, has to be installed separately
-        faulthandler.enable(file=sys.stderr, all_threads=True)
-        faulthandler.register(signal.SIGUSR1, file=sys.stderr, all_threads=True, chain=False)
-        sys.stderr.write("Registered faulthandler\n")
-      except ImportError:
-        pass  # Module is not included into python distribution
+
+      bind_debug_signal_handlers()
 
     _handler = HeartbeatStopHandlersLinux()
   else:

http://git-wip-us.apache.org/repos/asf/ambari/blob/aa588ca8/ambari-agent/src/main/python/ambari_agent/RemoteDebugUtils.py
----------------------------------------------------------------------
diff --git a/ambari-agent/src/main/python/ambari_agent/RemoteDebugUtils.py b/ambari-agent/src/main/python/ambari_agent/RemoteDebugUtils.py
index f2a462b..ae997ac 100644
--- a/ambari-agent/src/main/python/ambari_agent/RemoteDebugUtils.py
+++ b/ambari-agent/src/main/python/ambari_agent/RemoteDebugUtils.py
@@ -21,7 +21,26 @@ limitations under the License.
 try: import readline  # For readline input support
 except: pass
 
-import sys, os, traceback, codeop, cStringIO, cPickle, tempfile
+import sys, signal, os, traceback, codeop, cStringIO, cPickle, tempfile
+
+def bind_debug_signal_handlers():
+  signal.signal(signal.SIGUSR1, print_threads_stack_traces) # prints process threads current
stack trace to the err stream. (can be found in ambari-agent.out)
+  signal.signal(signal.SIGUSR2, remote_debug) # provide a read-only python shell, which represent
the process state at time of signal arrival.
+
+def print_threads_stack_traces(sig, frame):
+  print >> sys.stderr, "\n*** STACKTRACE - START ***\n"
+  code = []
+  for threadId, stack in sys._current_frames().items():
+    code.append("\n# ThreadID: %s" % threadId)
+    for filename, lineno, name, line in traceback.extract_stack(stack):
+      code.append('File: "%s", line %d, in %s' % (filename,
+                                                  lineno, name))
+      if line:
+        code.append("  %s" % (line.strip()))
+
+  for line in code:
+    print >> sys.stderr, line
+  print >> sys.stderr, "\n*** STACKTRACE - END ***\n"
 
 def pipename(pid):
   """Return name of pipe to use"""

http://git-wip-us.apache.org/repos/asf/ambari/blob/aa588ca8/ambari-agent/src/main/python/ambari_agent/StatusCommandsExecutor.py
----------------------------------------------------------------------
diff --git a/ambari-agent/src/main/python/ambari_agent/StatusCommandsExecutor.py b/ambari-agent/src/main/python/ambari_agent/StatusCommandsExecutor.py
index 8959640..20acee4 100644
--- a/ambari-agent/src/main/python/ambari_agent/StatusCommandsExecutor.py
+++ b/ambari-agent/src/main/python/ambari_agent/StatusCommandsExecutor.py
@@ -22,7 +22,8 @@ import signal
 import threading
 import logging
 import multiprocessing
-from PythonReflectiveExecutor import PythonReflectiveExecutor
+from ambari_agent.PythonReflectiveExecutor import PythonReflectiveExecutor
+from ambari_agent.RemoteDebugUtils import bind_debug_signal_handlers
 
 logger = logging.getLogger(__name__)
 
@@ -43,8 +44,10 @@ class StatusCommandsExecutor(multiprocessing.Process):
 
   def run(self):
     try:
+      bind_debug_signal_handlers()
       while True:
         command = self.actionQueue.statusCommandQueue.get(True) # blocks until status status
command appears
+        logger.info("Running status command for {0}".format(command['componentName'])) #
TODO: change to logger.debug once fixed
         
         timeout_timer = threading.Timer( self.status_command_timeout, self.respawn, [command])
         timeout_timer.start()
@@ -52,6 +55,7 @@ class StatusCommandsExecutor(multiprocessing.Process):
         self.process_status_command(command)
 
         timeout_timer.cancel()
+        logger.info("Completed status command for {0}".format(command['componentName']))
 # TODO: change to logger.debug once fixed
     except:
       logger.exception("StatusCommandsExecutor process failed with exception:")
       raise
@@ -67,8 +71,10 @@ class StatusCommandsExecutor(multiprocessing.Process):
 
   def respawn(self, command):
     try:
-      # Force context to reset to normal. By context we mean sys.path, imports, etc. They
are set by specific status command, and are not relevant to ambari-agent.
-      PythonReflectiveExecutor.last_context.revert()
+      if hasattr(PythonReflectiveExecutor, "last_context"):
+        # Force context to reset to normal. By context we mean sys.path, imports, etc. They
are set by specific status command, and are not relevant to ambari-agent.
+        PythonReflectiveExecutor.last_context.revert()
+
       logger.warn("Command {0} for {1} is running for more than {2} seconds. Terminating
it due to timeout.".format(command['commandType'], command['componentName'], self.status_command_timeout))
 
       self.hasTimeoutedEvent.set()


Mime
View raw message