Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 007ED200AE2 for ; Fri, 27 May 2016 18:17:38 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id F385B160A37; Fri, 27 May 2016 16:17:37 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 27C00160A10 for ; Fri, 27 May 2016 18:17:37 +0200 (CEST) Received: (qmail 40913 invoked by uid 500); 27 May 2016 16:17:36 -0000 Mailing-List: contact commits-help@ambari.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: ambari-dev@ambari.apache.org Delivered-To: mailing list commits@ambari.apache.org Received: (qmail 40897 invoked by uid 99); 27 May 2016 16:17:36 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 27 May 2016 16:17:36 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 0EF44DFC74; Fri, 27 May 2016 16:17:36 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: aonishuk@apache.org To: commits@ambari.apache.org Date: Fri, 27 May 2016 16:17:36 -0000 Message-Id: X-Mailer: ASF-Git Admin Mailer Subject: [1/2] ambari git commit: AMBARI-16934. Make it possible to debug ambari-agent in runtime to investigate memory leaks etc. (aonishuk) archived-at: Fri, 27 May 2016 16:17:38 -0000 Repository: ambari Updated Branches: refs/heads/branch-2.4 18c531fe1 -> 43200145d refs/heads/trunk 33c8feede -> e40b8825f AMBARI-16934. Make it possible to debug ambari-agent in runtime to investigate memory leaks etc. (aonishuk) Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/e40b8825 Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/e40b8825 Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/e40b8825 Branch: refs/heads/trunk Commit: e40b8825ff1549891fdad99d035aa48f953f2f9d Parents: 33c8fee Author: Andrew Onishuk Authored: Fri May 27 19:17:35 2016 +0300 Committer: Andrew Onishuk Committed: Fri May 27 19:17:35 2016 +0300 ---------------------------------------------------------------------- .../python/ambari_agent/HeartbeatHandlers.py | 7 +- .../python/ambari_agent/RemoteDebugUtils.py | 134 +++++++++++++++++++ .../src/main/python/ambari_agent/debug.py | 51 +++++++ 3 files changed, 189 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/e40b8825/ambari-agent/src/main/python/ambari_agent/HeartbeatHandlers.py ---------------------------------------------------------------------- diff --git a/ambari-agent/src/main/python/ambari_agent/HeartbeatHandlers.py b/ambari-agent/src/main/python/ambari_agent/HeartbeatHandlers.py index 67e3c77..4a3d372 100644 --- a/ambari-agent/src/main/python/ambari_agent/HeartbeatHandlers.py +++ b/ambari-agent/src/main/python/ambari_agent/HeartbeatHandlers.py @@ -26,6 +26,7 @@ import signal import threading import traceback from ambari_commons.os_family_impl import OsFamilyImpl +from RemoteDebugUtils import remote_debug import sys logger = logging.getLogger() @@ -84,13 +85,12 @@ def signal_handler(signum, frame): def debug(sig, frame): - """Interrupt running process, and provide a python prompt for - interactive debugging.""" + """Interrupt running process, and provide a stacktrace of threads """ d = {'_frame': frame} # Allow access to frame object. d.update(frame.f_globals) # Unless shadowed by global d.update(frame.f_locals) - message = "Signal received : entering python shell.\nTraceback:\n" + message = "Signal received.\nTraceback:\n" message += ''.join(traceback.format_stack(frame)) logger.info(message) @@ -128,6 +128,7 @@ def bind_signal_handlers(agentPid): if os.getpid() == agentPid: signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGTERM, signal_handler) + signal.signal(signal.SIGUSR2, remote_debug) # Interrupt running process, and provide a python prompt for it try: import faulthandler # This is not default module, has to be installed separately faulthandler.enable(file=sys.stderr, all_threads=True) http://git-wip-us.apache.org/repos/asf/ambari/blob/e40b8825/ambari-agent/src/main/python/ambari_agent/RemoteDebugUtils.py ---------------------------------------------------------------------- diff --git a/ambari-agent/src/main/python/ambari_agent/RemoteDebugUtils.py b/ambari-agent/src/main/python/ambari_agent/RemoteDebugUtils.py new file mode 100644 index 0000000..f2a462b --- /dev/null +++ b/ambari-agent/src/main/python/ambari_agent/RemoteDebugUtils.py @@ -0,0 +1,134 @@ +#!/usr/bin/env python + +''' +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +''' + +try: import readline # For readline input support +except: pass + +import sys, os, traceback, codeop, cStringIO, cPickle, tempfile + +def pipename(pid): + """Return name of pipe to use""" + return os.path.join(tempfile.gettempdir(), 'debug-%d' % pid) + +class NamedPipe(object): + def __init__(self, name, end=0, mode=0666): + """Open a pair of pipes, name.in and name.out for communication + with another process. One process should pass 1 for end, and the + other 0. Data is marshalled with pickle.""" + self.in_name, self.out_name = name +'.in', name +'.out', + try: os.mkfifo(self.in_name,mode) + except OSError: pass + try: os.mkfifo(self.out_name,mode) + except OSError: pass + + # NOTE: The order the ends are opened in is important - both ends + # of pipe 1 must be opened before the second pipe can be opened. + if end: + self.inp = open(self.out_name,'r') + self.out = open(self.in_name,'w') + else: + self.out = open(self.out_name,'w') + self.inp = open(self.in_name,'r') + self._open = True + + def is_open(self): + return not (self.inp.closed or self.out.closed) + + def put(self,msg): + if self.is_open(): + data = cPickle.dumps(msg,1) + self.out.write("%d\n" % len(data)) + self.out.write(data) + self.out.flush() + else: + raise Exception("Pipe closed") + + def get(self): + txt=self.inp.readline() + if not txt: + self.inp.close() + else: + l = int(txt) + data=self.inp.read(l) + if len(data) < l: self.inp.close() + return cPickle.loads(data) # Convert back to python object. + + def close(self): + self.inp.close() + self.out.close() + try: os.remove(self.in_name) + except OSError: pass + try: os.remove(self.out_name) + except OSError: pass + + def __del__(self): + self.close() + +def remote_debug(sig,frame): + """Handler to allow process to be remotely debugged.""" + def _raiseEx(ex): + """Raise specified exception in the remote process""" + _raiseEx.ex = ex + _raiseEx.ex = None + + try: + # Provide some useful functions. + locs = {'_raiseEx' : _raiseEx} + locs.update(frame.f_locals) # Unless shadowed. + globs = frame.f_globals + + pid = os.getpid() # Use pipe name based on pid + pipe = NamedPipe(pipename(pid)) + + old_stdout, old_stderr = sys.stdout, sys.stderr + txt = '' + pipe.put("Interrupting process at following point:\n" + + ''.join(traceback.format_stack(frame)) + ">>> ") + + try: + while pipe.is_open() and _raiseEx.ex is None: + line = pipe.get() + if line is None: continue # EOF + txt += line + try: + code = codeop.compile_command(txt) + if code: + sys.stdout = cStringIO.StringIO() + sys.stderr = sys.stdout + exec code in globs,locs + txt = '' + pipe.put(sys.stdout.getvalue() + '>>> ') + else: + pipe.put('... ') + except: + txt='' # May be syntax err. + sys.stdout = cStringIO.StringIO() + sys.stderr = sys.stdout + traceback.print_exc() + pipe.put(sys.stdout.getvalue() + '>>> ') + finally: + sys.stdout = old_stdout # Restore redirected output. + sys.stderr = old_stderr + pipe.close() + + except Exception: # Don't allow debug exceptions to propogate to real program. + traceback.print_exc() + + if _raiseEx.ex is not None: raise _raiseEx.ex \ No newline at end of file http://git-wip-us.apache.org/repos/asf/ambari/blob/e40b8825/ambari-agent/src/main/python/ambari_agent/debug.py ---------------------------------------------------------------------- diff --git a/ambari-agent/src/main/python/ambari_agent/debug.py b/ambari-agent/src/main/python/ambari_agent/debug.py new file mode 100644 index 0000000..f8212de --- /dev/null +++ b/ambari-agent/src/main/python/ambari_agent/debug.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python + +''' +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +''' + +""" +Run this file to interrupt a running python process and open an interactive shell. +""" + +import os +import signal +from RemoteDebugUtils import NamedPipe +from RemoteDebugUtils import pipename + +def debug_process(pid): + """Interrupt a running process and debug it.""" + os.kill(pid, signal.SIGUSR2) # Signal process. + pipe = NamedPipe(pipename(pid), 1) + try: + while pipe.is_open(): + txt=raw_input(pipe.get()) + '\n' + pipe.put(txt) + except EOFError: + pass # Exit. + pipe.close() + +def main(): + with open("/var/run/ambari-agent/ambari-agent.pid") as f: + pid_str = f.read().strip() + pid = int(pid_str) + + debug_process(pid) + +if __name__=='__main__': + main() + \ No newline at end of file