qpid-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From c...@apache.org
Subject qpid-dispatch git commit: DISPATCH-1191: Fixes based on real-world large log files
Date Tue, 27 Nov 2018 16:21:37 GMT
Repository: qpid-dispatch
Updated Branches:
  refs/heads/master 5c3411a1a -> 75c9763a9


DISPATCH-1191: Fixes based on real-world large log files

* Add a large-file splitter to break files into manageable chunks
* Fix Attach parser to accept QpidJMS nested described types and PN_SYMBOL
* Add command line argparse
** Select split or merge program mode
** Disable over-the-top analysis to reduce output on large input files
* Remove file split.py to avoid confusion with log_split.py


Project: http://git-wip-us.apache.org/repos/asf/qpid-dispatch/repo
Commit: http://git-wip-us.apache.org/repos/asf/qpid-dispatch/commit/75c9763a
Tree: http://git-wip-us.apache.org/repos/asf/qpid-dispatch/tree/75c9763a
Diff: http://git-wip-us.apache.org/repos/asf/qpid-dispatch/diff/75c9763a

Branch: refs/heads/master
Commit: 75c9763a9cf28dacd87bec84bfd7d0e9748f6dca
Parents: 5c3411a
Author: Chuck Rolke <crolke@redhat.com>
Authored: Tue Nov 27 11:14:27 2018 -0500
Committer: Chuck Rolke <crolke@redhat.com>
Committed: Tue Nov 27 11:14:27 2018 -0500

----------------------------------------------------------------------
 bin/log_scraper/common.py               |  13 +-
 bin/log_scraper/log_splitter.py         | 426 +++++++++++++++++++++++++++
 bin/log_scraper/main.py                 |  57 ++--
 bin/log_scraper/parser.py               | 118 +++++++-
 bin/log_scraper/splitter.py             |  94 ------
 bin/log_scraper/test_data/test_data.txt |   2 +
 6 files changed, 581 insertions(+), 129 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/qpid-dispatch/blob/75c9763a/bin/log_scraper/common.py
----------------------------------------------------------------------
diff --git a/bin/log_scraper/common.py b/bin/log_scraper/common.py
index d570024..0a74f3c 100755
--- a/bin/log_scraper/common.py
+++ b/bin/log_scraper/common.py
@@ -40,11 +40,14 @@ else:
 
 class Common():
 
-    # arg - index transfer data or not
-    # If a log file has 100M transfers then adverbl dies.
-    # With program arg --no-data then data indexing is turned off but
-    # the output still shows connections, links, and link state costs.
-    arg_index_data = True
+    # analysis_level_ludicrous
+    # Adverbl tries too hard to cross reference data
+    # Use these switchs to turn some of the biggest offenders off
+    per_link_detail = True
+    message_progress_tables = False
+
+    # returned from argparse.parse_args()
+    args = None
 
     # first letter of the connection names
     log_char_base = 'A'

http://git-wip-us.apache.org/repos/asf/qpid-dispatch/blob/75c9763a/bin/log_scraper/log_splitter.py
----------------------------------------------------------------------
diff --git a/bin/log_scraper/log_splitter.py b/bin/log_scraper/log_splitter.py
new file mode 100755
index 0000000..c83e385
--- /dev/null
+++ b/bin/log_scraper/log_splitter.py
@@ -0,0 +1,426 @@
+#!/usr/bin/env python
+
+# Split a gigantic (or not) log file into files of traffic for each connection.
+# Identify probable router and broker connections, QpidJMS client connections,
+# and AMQP errors. Create lists of connections sorted by log line and by transfer counts.
+# Emit a web page summarizing the results.
+
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+from __future__ import print_function
+
+import cgi
+import os
+import sys
+import traceback
+from collections import defaultdict
+
+
+class connection():
+    def __init__(self, instance, conn_id, logfile):
+        self.instance = instance
+        self.conn_id = conn_id
+        self.logfile = logfile
+        self.lines = []
+        self.key_name = connection.keyname(instance, conn_id)
+        self.transfers = 0
+        self.peer_open = ""
+        self.peer_type = ""
+        self.log_n_lines = 0
+        self.log_n_dir = ""
+        self.file_name = ""
+        self.path_name = ""
+
+    @staticmethod
+    def keyname(instance, conn_id):
+        tmp = "0000000" + str(conn_id)
+        return str(instance) + "." + tmp[-8:]
+
+    def disp_name(self):
+        return str(self.instance) + "_" + str(self.conn_id)
+
+    def generate_paths(self):
+        self.log_n_dir = "10e%d" % self.log_n_lines
+        self.file_name = self.disp_name() + ".log"
+        self.path_name = self.log_n_dir + "/" + self.file_name
+
+
+class LogFile:
+    def __init__(self, fn, top_n=24):
+        """
+        Represent connections in a file
+        :param fn: file name
+        :param
+        """
+        self.log_fn = fn    # file name
+        self.top_n = top_n  # how many to report
+        self.instance = 0   # incremented when router restarts in log file
+        self.amqp_lines = 0 # server trace lines
+        self.transfers = 0  # server transfers
+
+        # restarts
+        self.restarts = []
+
+        # connections
+        # dictionary of connection data
+        # key = connection id: <instance>.<conn_id>    "0.3"
+        # val = connection class object
+        self.connections = {}
+
+        # router_connections
+        # list of received opens that suggest a router at the other end
+        self.router_connections = []
+
+        # broker connections
+        # list of received opens that suggest a broker at the other end
+        self.broker_connections = []
+
+        # errors
+        # amqp errors in time order
+        self.errors = []
+
+        # conns_by_size_transfer
+        # all connections in transfer size descending order
+        self.conns_by_size_transfer = []
+
+        # conns_by_size_loglines
+        # all connections in log_lines size descending order
+        self.conns_by_size_loglines = []
+
+        # histogram - count of connections with N logs < 10^index
+        # [0] = N < 10^0
+        # [1] = N < 10^1
+        self.histogram = [0,0,0,0,0,0,0,0,0,0]
+        self.hist_max = len(self.histogram) - 1
+
+    def parse_identify(self, text, line, before_col=70):
+        """
+        Look for text in line but make sure it's not in the body of some message,
+        :param text:
+        :param line:
+        :param before_col: limit on how far to search into line
+        """
+        st = line.find(text, 0, (before_col + len(text)))
+        if st < 0:
+            return False
+        return st < 70
+
+    def parse_line(self, line):
+        """
+        Do minimum parsing on line.
+        If container name then bump instance value
+        If server trace then get conn_id and add line to connections data
+        :param line:
+        :return:
+        """
+        key_sstart = "SERVER (info) Container Name:"  # Normal 'router is starting' restart
discovery line
+        key_strace = "SERVER (trace) ["  # AMQP traffic
+        key_error = "@error(29)"
+        key_openin = "<- @open(16)"
+        key_xfer = "@transfer"
+        key_prod_dispatch = ':product="qpid-dispatch-router"'
+        key_prod_aartemis = ':product="apache-activemq-artemis"'
+        key_prod_aqpidcpp = ':product="qpid-cpp"'
+        key_prod_aqpidjms = ':product="QpidJMS"'
+
+        if self.parse_identify(key_sstart, line):
+            self.instance += 1
+            self.restarts.append(line)
+        else:
+            if self.parse_identify(key_strace, line):
+                self.amqp_lines += 1
+                idx = line.find(key_strace)
+                idx += len(key_strace)
+                eidx = line.find("]", idx + 1)
+                conn_id = line[idx:eidx]
+                keyname = connection.keyname(self.instance, conn_id)
+                if keyname not in self.connections:
+                    self.connections[keyname] = connection(self.instance, conn_id, self)
+                curr_conn = self.connections[keyname]
+                curr_conn.lines.append(line)
+                # router hint
+                if key_openin in line:
+                    # inbound open
+                    if key_prod_dispatch in line:
+                        self.router_connections.append(curr_conn)
+                        curr_conn.peer_open = line
+                        curr_conn.peer_type = key_prod_dispatch
+                    elif key_prod_aqpidjms in line:
+                            curr_conn.peer_type = key_prod_aqpidjms
+                    else:
+                        for k in [key_prod_aartemis, key_prod_aqpidcpp]:
+                            if k in line:
+                                self.broker_connections.append(curr_conn)
+                                curr_conn.peer_open = line
+                                curr_conn.peer_type = k
+                elif self.parse_identify(key_xfer, line):
+                    self.transfers += 1
+                    curr_conn.transfers += 1
+        if key_error in line:
+            self.errors.append(line)
+
+    def log_of(self, x):
+        """
+        calculate nearest power of 10 > x
+        :param x:
+        :return:
+        """
+        for i in range(self.hist_max):
+            if x < 10 ** i:
+                return i
+        return self.hist_max
+
+    def sort_sizes(self, sortfunc1, sortfunc2):
+        smap = defaultdict(list)
+        conns_by_size = []
+        # create size map. index is size, list holds all connections of that many transfers
+        for k, v in dict_iteritems(self.connections):
+            smap[str(sortfunc1(v))].append(v)
+        # create a sorted list of sizes in sizemap
+        sl = list(dict_iterkeys(smap))
+        sli = [int(k) for k in sl]
+        slist = sorted(sli, reverse=True)
+        # create grand list of all connections
+        for cursize in slist:
+            lsm = smap[str(cursize)]
+            lsm = sorted(lsm, key = sortfunc2, reverse=True)
+            #lsm = sorted(lsm, key = lambda x: int(x.conn_id))
+            for ls in lsm:
+                conns_by_size.append(ls)
+        return conns_by_size
+
+
+    def summarize_connections(self):
+        # sort connections based on transfer count and on n log lines
+        self.conns_by_size_transfer = self.sort_sizes(lambda x: x.transfers, lambda x: len(x.lines))
+        self.conns_by_size_loglines = self.sort_sizes(lambda x: len(x.lines), lambda x: x.transfers)
+
+        # compute log_n and file name facts for all connections
+        for k, v in dict_iteritems(self.connections):
+            v.log_n_lines = self.log_of(len(v.lines))
+            v.generate_paths()
+
+        # Write the web doc to stdout
+        print ("""<!DOCTYPE html>
+        <html>
+        <head>
+        <title>%s qpid-dispatch log split</title>
+
+        <style>
+            * { 
+            font-family: sans-serif; 
+        }
+        table {
+            border-collapse: collapse;
+        }
+        table, td, th {
+            border: 1px solid black;
+            padding: 3px;
+        }
+        </style>
+""" % self.log_fn)
+
+        print("""
+<h3>Contents</h3>
+<table>
+<tr> <th>Section</th>                                                 
   <th>Description</th> </tr>
+<tr><td><a href=\"#c_summary\"        >Summary</a></td>   
               <td>Summary</td></tr>
+<tr><td><a href=\"#c_restarts\"       >Router restarts</a></td>
          <td>Router reboot records</td></tr>
+<tr><td><a href=\"#c_router_conn\"    >Interrouter connections</a></td>
  <td>Probable interrouter connections</td></tr>
+<tr><td><a href=\"#c_broker_conn\"    >Broker connections</a></td>
       <td>Probable broker connections</td></tr>
+<tr><td><a href=\"#c_errors\"         >AMQP errors</a></td>
              <td>AMQP errors</td></tr>
+<tr><td><a href=\"#c_conn_xfersize\"  >Conn by N transfers</a></td>
      <td>Connections sorted by transfer log count</td></tr>
+<tr><td><a href=\"#c_conn_xfer0\"     >Conn with no transfers</a></td>
   <td>Connections with no transfers</td></tr>
+<tr><td><a href=\"#c_conn_logsize\"   >Conn by N log lines</a></td>
      <td>Connections sorted by total log line count</td></tr>
+</table>
+<hr>
+""")
+        print("<a name=\"c_summary\"></a>")
+        print("<table>")
+        print("<tr><th>Statistic</th>          <th>Value</th></tr>")
+        print("<tr><td>File</td>               <td>%s</td></tr>"
% self.log_fn)
+        print("<tr><td>Router starts</td>      <td>%s</td></tr>"
% str(self.instance))
+        print("<tr><td>Connections</td>        <td>%s</td></tr>"
% str(len(self.connections)))
+        print("<tr><td>Router connections</td> <td>%s</td></tr>"
% str(len(self.router_connections)))
+        print("<tr><td>AMQP log lines</td>     <td>%s</td></tr>"
% str(self.amqp_lines))
+        print("<tr><td>AMQP errors</td>        <td>%s</td></tr>"
% str(len(self.errors)))
+        print("<tr><td>AMQP transfers</td>     <td>%s</td></tr>"
% str(self.transfers))
+        print("</table>")
+        print("<hr>")
+
+        # Restarts
+        print("<a name=\"c_restarts\"></a>")
+        print("<h3>Restarts</h3>")
+        for i in range(1, (self.instance + 1)):
+            rr = self.restarts[i-1]
+            print("(%d) - %s<br>" % (i, rr), end='')
+        print("<hr>")
+
+        # interrouter connections
+        print("<a name=\"c_router_conn\"></a>")
+        print("<h3>Probable inter-router connections (N=%d)</h3>" % (len(self.router_connections)))
+        print("<table>")
+        print("<tr><th>Connection</th> <th>Transfers</th> <th>Log
lines</th> <th>AMQP Open<th></tr>")
+        for rc in self.router_connections:
+            print("<tr><td><a href=\"%s/%s\">%s</a></td><td>%d</td><td>%d</td><td>%s</td></tr>"
%
+                  (rc.logfile.odir(), rc.path_name, rc.disp_name(), rc.transfers, len(rc.lines),
+                   cgi.escape(rc.peer_open)))
+        print("</table>")
+        print("<hr>")
+
+        # broker connections
+        print("<a name=\"c_broker_conn\"></a>")
+        print("<h3>Probable broker connections (N=%d)</h3>" % (len(self.broker_connections)))
+        print("<table>")
+        print("<tr><th>Connection</th> <th>Transfers</th> <th>Log
lines</th> <th>AMQP Open<th></tr>")
+        for rc in self.broker_connections:
+            print("<tr><td><a href=\"%s/%s\">%s</a></td><td>%d</td><td>%d</td><td>%s</td></tr>"
%
+                  (rc.logfile.odir(), rc.path_name, rc.disp_name(), rc.transfers, len(rc.lines),
+                   cgi.escape(rc.peer_open)))
+        print("</table>")
+        print("<hr>")
+
+        ## histogram
+        #for cursize in self.sizelist:
+        #    self.histogram[self.log_of(cursize)] += len(self.sizemap[str(cursize)])
+        #print()
+        #print("Log lines per connection distribution")
+        #for i in range(1, self.hist_max):
+        #    print("N <  10e%d : %d" %(i, self.histogram[i]))
+        #print("N >= 10e%d : %d" % ((self.hist_max - 1), self.histogram[self.hist_max]))
+
+        # errors
+        print("<a name=\"c_errors\"></a>")
+        print("<h3>AMQP errors (N=%d)</h3>" % (len(self.errors)))
+        print("<table>")
+        print("<tr><th>N</th> <th>AMQP error</th></tr>")
+        for i in range(len(self.errors)):
+            print("<tr><td>%d</td> <td>%s</td></tr>"
% (i, cgi.escape(self.errors[i].strip())))
+        print("</table>")
+        print("<hr>")
+
+    def odir(self):
+        return os.path.join(os.getcwd(), (self.log_fn + ".splits"))
+
+    def write_subfiles(self):
+        # Q: Where to put the generated files? A: odir
+        odir = self.odir()
+        odirs = ['dummy'] # dirs indexed by log of n-lines
+
+        os.makedirs(odir)
+        for i in range(1, self.hist_max):
+            nrange = ("10e%d" % (i))
+            ndir = os.path.join(odir, nrange)
+            os.makedirs(ndir)
+            odirs.append(ndir)
+
+        for k, c in dict_iteritems(self.connections):
+            cdir = odirs[self.log_of(len(c.lines))]
+            opath = os.path.join(cdir, (c.disp_name() + ".log"))
+            with open(opath, 'w') as f:
+                for l in c.lines:
+                    f.write(l)
+
+        xfer0 = 0
+        for rc in self.conns_by_size_transfer:
+            if rc.transfers == 0:
+                xfer0 += 1
+        print("<a name=\"c_conn_xfersize\"></a>")
+        print("<h3>Connections by transfer count (N=%d)</h3>" % (len(self.conns_by_size_transfer)
- xfer0))
+        print("<table>")
+        n = 1
+        print("<tr><th>N</th><th>Connection</th> <th>Transfers</th>
<th>Log lines</th> <th>Type</th> <th>AMQP detail<th></tr>")
+        for rc in self.conns_by_size_transfer:
+            if rc.transfers > 0:
+                print("<tr><td>%d</td><td><a href=\"%s/%s\">%s</a></td>
<td>%d</td> <td>%d</td> <td>%s</td> <td>%s</td></tr>"
%
+                      (n, rc.logfile.odir(), rc.path_name, rc.disp_name(), rc.transfers,
len(rc.lines),
+                       rc.peer_type, cgi.escape(rc.peer_open)))
+                n += 1
+        print("</table>")
+        print("<hr>")
+
+        print("<a name=\"c_conn_xfer0\"></a>")
+        print("<h3>Connections with no AMQP transfers (N=%d)</h3>" % (xfer0))
+        print("<table>")
+        n = 1
+        print("<tr><th>N</th><th>Connection</th> <th>Transfers</th>
<th>Log lines</th> <th>Type</th> <th>AMQP detail<th></tr>")
+        for rc in self.conns_by_size_transfer:
+            if rc.transfers == 0:
+                print("<tr><td>%d</td><td><a href=\"%s/%s\">%s</a></td>
<td>%d</td> <td>%d</td> <td>%s</td> <td>%s</td></tr>"
%
+                      (n, rc.logfile.odir(), rc.path_name, rc.disp_name(), rc.transfers,
len(rc.lines),
+                       rc.peer_type, cgi.escape(rc.peer_open)))
+                n += 1
+        print("</table>")
+        print("<hr>")
+
+        print("<a name=\"c_conn_logsize\"></a>")
+        print("<h3>Connections by total log line count (N=%d)</h3>" % (len(self.conns_by_size_loglines)))
+        print("<table>")
+        n = 1
+        print("<tr><th>N</th><th>Connection</th> <th>Transfers</th>
<th>Log lines</th> <th>Type</th> <th>AMQP detail<th></tr>")
+        for rc in self.conns_by_size_loglines:
+            print("<tr><td>%d</td><td><a href=\"%s/%s\">%s</a></td>
<td>%d</td> <td>%d</td> <td>%s</td> <td>%s</td></tr>"
%
+                  (n, rc.logfile.odir(), rc.path_name, rc.disp_name(), rc.transfers, len(rc.lines),
+                   rc.peer_type, cgi.escape(rc.peer_open)))
+            n += 1
+        print("</table>")
+        print("<hr>")
+
+
+# py 2-3 compat
+
+IS_PY2 = sys.version_info[0] == 2
+
+if IS_PY2:
+    def dict_iteritems(d):
+        return d.iteritems()
+    def dict_iterkeys(d):
+        return d.iterkeys()
+else:
+    def dict_iteritems(d):
+        return iter(d.items())
+    def dict_iterkeys(d):
+        return iter(d.keys())
+
+
+#
+#
+def main_except(log_fn):
+    """
+    Given a log file name, split the file into per-connection sub files
+    """
+    log_files = []
+
+    if not os.path.exists(log_fn):
+        sys.exit('ERROR: log file %s was not found!' % log_fn)
+
+    # parse the log file
+    with open(log_fn, 'r') as infile:
+        lf = LogFile(log_fn)
+        odir = lf.odir()
+        if os.path.exists(odir):
+            sys.exit('ERROR: output directory %s exists' % odir)
+        log_files.append(lf)
+        for line in infile:
+            lf.parse_line(line)
+
+    # write output
+    for lf in log_files:
+        lf.summarize_connections() # prints web page to console
+        lf.write_subfiles()        # generates split files one-per-connection
+    pass
+
+def main(argv):
+    try:
+        if len(argv) != 2:
+            sys.exit('Usage: %s log-file-name' % argv[0])
+        main_except(argv[1])
+        return 0
+    except Exception as e:
+        traceback.print_exc()
+        return 1
+
+
+if __name__ == "__main__":
+    sys.exit(main(sys.argv))

http://git-wip-us.apache.org/repos/asf/qpid-dispatch/blob/75c9763a/bin/log_scraper/main.py
----------------------------------------------------------------------
diff --git a/bin/log_scraper/main.py b/bin/log_scraper/main.py
index a0d4b40..a44039f 100755
--- a/bin/log_scraper/main.py
+++ b/bin/log_scraper/main.py
@@ -33,6 +33,7 @@ from __future__ import division
 from __future__ import absolute_import
 from __future__ import print_function
 
+import argparse
 import ast
 import cgi
 import os
@@ -40,6 +41,7 @@ import sys
 import traceback
 
 import common
+from log_splitter import main_except as splitter_main
 import parser
 import router
 import text
@@ -77,23 +79,38 @@ def show_noteworthy_line(plf, comn):
 #
 #
 def main_except(argv):
-    """
-    Given a list of log file names, send the javascript web page to stdout
-    """
-    if len(argv) < 2:
-        sys.exit('Usage: %s [--no-data] log-file-name [log-file-name ...]' % argv[0])
-
     # Instantiate a common block
     comn = common.Common()
 
-    # optparse - look for --no-data switch
-    if argv[1] == "--no-data":
-        comn.arg_index_data = False
-        del argv[1]
+    # optparse - look for data-inhibit and program mode control
+    p = argparse.ArgumentParser()
+    p.add_argument('--skip-all-data',
+                   action='store_true',
+                   help='Max load shedding: do not store/index transfer, disposition, flow
or EMPTY_FRAME data')
+    p.add_argument('--skip-detail',
+                   action='store_true',
+                   help='Load shedding: do not produce Connection Details tables')
+    p.add_argument('--skip-msg-progress',
+                   action='store_true',
+                   help='Load shedding: do not produce Message Progress tables')
+    p.add_argument('--split',
+                   action='store_true',
+                   help='A file is split into per-connection data. Normal processing is not
performed.')
+    p.add_argument('--files', '-f', nargs="+")
+
+    del argv[0]
+    comn.args = p.parse_args(argv)
+
+    # process split function
+    if comn.args.split:
+        # Split processes only a single file
+        if len(comn.args.files) > 1:
+            sys.exit('--split mode takes only one file name')
+        return splitter_main(comn.args.files[0])
 
     # process the log files and add the results to router_array
-    for log_i in range(0, len(sys.argv) - 1):
-        arg_log_file = sys.argv[log_i + 1]
+    for log_i in range(len(comn.args.files)):
+        arg_log_file = comn.args.files[log_i]
         comn.log_fns.append(arg_log_file)
         comn.n_logs += 1
 
@@ -255,8 +272,8 @@ def main_except(argv):
     print(text.web_page_toc())
 
     # Report how much data was skipped if --no-data switch in effect
-    if not comn.arg_index_data:
-        print("--no-data switch in effect. %d log lines skipped" % comn.data_skipped)
+    if comn.args.skip_all_data:
+        print("--skip-all-data switch is in effect. %d log lines skipped" % comn.data_skipped)
         print("<p><hr>")
 
     # file(s) included in this doc
@@ -398,9 +415,12 @@ def main_except(argv):
     # connection details
     print("<a name=\"c_conndetails\"></a>")
     print("<h3>Connection Details</h3>")
-    for rtrlist in comn.routers:
-        for rtr in rtrlist:
-            rtr.details.show_html()
+    if not comn.args.skip_detail:
+        for rtrlist in comn.routers:
+            for rtr in rtrlist:
+                rtr.details.show_html()
+    else:
+        print ("details suppressed<br>")
     print("<hr>")
 
     # noteworthy log lines: highlight errors and stuff
@@ -522,7 +542,8 @@ def main_except(argv):
     # data traversing network
     print("<a name=\"c_messageprogress\"></a>")
     print("<h3>Message progress</h3>")
-    for i in range(0, comn.shorteners.short_data_names.len()):
+    if not comn.args.skip_msg_progress:
+      for i in range(0, comn.shorteners.short_data_names.len()):
         sname = comn.shorteners.short_data_names.shortname(i)
         size = 0
         for plf in tree:

http://git-wip-us.apache.org/repos/asf/qpid-dispatch/blob/75c9763a/bin/log_scraper/parser.py
----------------------------------------------------------------------
diff --git a/bin/log_scraper/parser.py b/bin/log_scraper/parser.py
index 44c3e91..05e8ae3 100755
--- a/bin/log_scraper/parser.py
+++ b/bin/log_scraper/parser.py
@@ -29,7 +29,6 @@ import re
 import sys
 import traceback
 
-import splitter
 import test_data as td
 import common
 import text
@@ -41,6 +40,55 @@ def colorize_bg(what):
     return what
 
 
+def proton_split(line):
+    """
+    Split a log line into fields.
+     * allow commas and spaces in quoted strings.
+     * split on ', ' and on ' '.
+       strip trailing commas between fields.
+     * quoted fields must have both quotes
+    :param line:
+    :return:
+    """
+    result = []
+    indqs = False
+    pending_comma = False
+    res = ""
+    for i in range(len(line)):
+        c = line[i]
+        if c == '\"':
+            if pending_comma:
+                res += ','
+                pending_comma = False
+            indqs = not indqs
+            res += c
+        elif c == ',':
+            if pending_comma:
+                res += c
+            pending_comma = True
+        elif c == ' ':
+            if indqs:
+                if pending_comma:
+                    res += ','
+                    pending_comma = False
+                res += c
+            else:
+                if res != '':
+                    if pending_comma:
+                        pending_comma = False
+                    result.append(res)
+                    res = ''
+        else:
+            res += c
+    if res != '':
+        result.append(str(res))
+    if indqs:
+        raise ValueError("SPLIT ODD QUOTES: %s", line)
+    # print ("SPLIT: line: %s" % line)
+    # print ("SPLIT: flds: %s" % result)
+    return result
+
+
 class LogLineData:
 
     def direction_is_in(self):
@@ -227,7 +275,7 @@ class DescribedType:
         self.line = self.line[:-1]
 
         # process fields
-        fields = splitter.Splitter.split(self.line)
+        fields = proton_split(self.line)
         while len(fields) > 0 and len(fields[0]) > 0:
             if '=' not in fields[0]:
                 raise ValueError("Field does not contain equal sign '%s'" % fields[0])
@@ -243,23 +291,43 @@ class DescribedType:
                     subfields.append("[]")
                     del fields[0]
                 else:
+                    # While extracting this type's fields, include nested described types
+                    # and PN_SYMBOL data enclosed in brackets. Current type ends when close
+                    # bracket seen and nest level is zero.
+                    nest = 0
                     while len(fields) > 0:
-                        if fields[0].endswith('],'):
-                            subfields.append(fields[0][:-2])
-                            subfields.append(']')
-                            del fields[0]
-                            break
-                        if fields[0].endswith(']'):
-                            subfields.append(fields[0][:-1])
-                            subfields.append(']')
+                        if "=@" in fields[0] and "]" not in fields[0] and "=@:" not in fields[0]:
+                            nest += 1
+                        if nest == 0:
+                            if fields[0].endswith('],'):
+                                subfields.append(fields[0][:-2])
+                                subfields.append(']')
+                                del fields[0]
+                                break
+                            if fields[0].endswith(']'):
+                                subfields.append(fields[0][:-1])
+                                subfields.append(']')
+                                del fields[0]
+                                break
+                        elif fields[0].endswith('],') or fields[0].endswith(']'):
+                            nest -= 1
+                        if fields[0].endswith(']]'):
+                            subfields.append(fields[0])
                             del fields[0]
                             break
                         subfields.append(fields[0])
                         del fields[0]
 
+
                 subtype = DescribedType()
                 subtype.parse_dtype_line(val, ' '.join(subfields))
                 self.dict[key] = subtype
+            elif val.startswith("@PN_SYMBOL"):
+                # symbols may end in first field or some later field
+                while not val.endswith(']'):
+                    val += fields[0]
+                    del fields[0]
+                self.dict[key] = val
             elif val.startswith('{'):
                 # handle some embedded map: properties={:product=\"qpid-dispatch-router\",
:version=\"1.3.0-SNAPSHOT\"}
                 # pull subtype's data out of fields. The fields list belongs to parent.
@@ -717,7 +785,20 @@ class ParsedLogLine(object):
         try:
             self.datetime = datetime.strptime(self.line[:26], '%Y-%m-%d %H:%M:%S.%f')
         except:
-            self.datetime = datetime(1970, 1, 1)
+            # old routers flub the timestamp and don't print leading zero in uS time
+            # 2018-11-18 11:31:08.269 should be 2018-11-18 11:31:08.000269
+            td = self.line[:26]
+            parts = td.split('.')
+            us = parts[1]
+            parts_us = us.split(' ')
+            if len(parts_us[0]) < 6:
+                parts_us[0] = '0' * (6 - len(parts_us[0])) + parts_us[0]
+            parts[1] = ' '.join(parts_us)
+            td = '.'.join(parts)
+            try:
+                self.datetime = datetime.strptime(td[:26], '%Y-%m-%d %H:%M:%S.%f')
+            except:
+                self.datetime = datetime(1970, 1, 1)
 
         # extract connection number
         sti = self.line.find(self.server_trace_key)
@@ -868,7 +949,7 @@ def parse_log_file(fn, log_index, comn):
                 try:
                     if lineno == 130:
                         pass
-                    do_this = comn.arg_index_data
+                    do_this = True if not hasattr(comn.args, 'skip_all_data') else not comn.args.skip_all_data
                     if not do_this:
                         # not indexing data. maybe do this line anyway
                         do_this = not any(s in line for s in [' @transfer', ' @disposition',
' @flow', 'EMPTY FRAME'])
@@ -894,7 +975,19 @@ def parse_log_file(fn, log_index, comn):
 
 
 if __name__ == "__main__":
+    print("Line-by-line split test")
+    try:
+        for line in td.TestData().data():
+            if "transfer" not in line:
+                print(proton_split(line))
+            else:
+                pass  # splitter does not split transfers
+        pass
+    except:
+        traceback.print_exc(file=sys.stdout)
+        pass
 
+    print("Canned data parse test")
     data = td.TestData().data()
     log_index = 0  # from file for router A
     instance = 0  # all from router instance 0
@@ -908,6 +1001,7 @@ if __name__ == "__main__":
         traceback.print_exc(file=sys.stdout)
         pass
 
+    print("Read two-instance file test")
     comn2 = common.Common()
     routers = parse_log_file('test_data/A-two-instances.log', 0, comn2)
     if len(routers) != 2:

http://git-wip-us.apache.org/repos/asf/qpid-dispatch/blob/75c9763a/bin/log_scraper/splitter.py
----------------------------------------------------------------------
diff --git a/bin/log_scraper/splitter.py b/bin/log_scraper/splitter.py
deleted file mode 100755
index a0353f7..0000000
--- a/bin/log_scraper/splitter.py
+++ /dev/null
@@ -1,94 +0,0 @@
-#!/usr/bin/env python
-
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-
-from __future__ import unicode_literals
-from __future__ import division
-from __future__ import absolute_import
-from __future__ import print_function
-
-import traceback
-import test_data as td
-
-
-class Splitter():
-    @staticmethod
-    def split(line):
-        """
-        Split a log line into fields.
-         * allow commas and spaces in quoted strings.
-         * split on ', ' and on ' '.
-           strip trailing commas between fields.
-         * quoted fields must have both quotes
-        :param line:
-        :return:
-        """
-        result = []
-        indqs = False
-        pending_comma = False
-        res = ""
-        for i in range(len(line)):
-            c = line[i]
-            if c == '\"':
-                if pending_comma:
-                    res += ','
-                    pending_comma = False
-                indqs = not indqs
-                res += c
-            elif c == ',':
-                if pending_comma:
-                    res += c
-                pending_comma = True
-            elif c == ' ':
-                if indqs:
-                    if pending_comma:
-                        res += ','
-                        pending_comma = False
-                    res += c
-                else:
-                    if res != '':
-                        if pending_comma:
-                            pending_comma = False
-                        result.append(res)
-                        res = ''
-            else:
-                res += c
-        if res != '':
-            result.append(str(res))
-        if indqs:
-            raise ValueError("SPLIT ODD QUOTES: %s", line)
-        # print ("SPLIT: line: %s" % line)
-        # print ("SPLIT: flds: %s" % result)
-        return result
-
-
-if __name__ == "__main__":
-
-    try:
-        for line in td.TestData().data():
-            if "transfer" not in line:
-                print(Splitter.split(line))
-                print()
-            else:
-                pass  # splitter does not split transfers
-        pass
-    except:
-        traceback.print_exc(file=sys.stdout)
-        pass

http://git-wip-us.apache.org/repos/asf/qpid-dispatch/blob/75c9763a/bin/log_scraper/test_data/test_data.txt
----------------------------------------------------------------------
diff --git a/bin/log_scraper/test_data/test_data.txt b/bin/log_scraper/test_data/test_data.txt
index 6c91258..f659a34 100644
--- a/bin/log_scraper/test_data/test_data.txt
+++ b/bin/log_scraper/test_data/test_data.txt
@@ -1,3 +1,5 @@
+2018-11-21 15:47:09.727570 -0500 SERVER (trace) [7]:1 <- @attach(18) [name="qpid-jms:sender:ID:23d7f58d-9bb1-4a8a-9701-d6eb7f7ec15e:1:1:1:test.Q0",
handle=0, role=false, snd-settle-mode=0, rcv-settle-mode=0, source=@source(40) [address="ID:23d7f58d-9cc1-4a8a-9701-d6eb7f7ec15e:1:1:1",
durable=0, expiry-policy=:"session-end", timeout=0, dynamic=false, outcomes=@PN_SYMBOL[:"amqp:accepted:list",
:"amqp:rejected:list", :"amqp:released:list", :"amqp:modified:list"]], target=@target(41)
[address="test.Q0", durable=0, expiry-policy=:"session-end", timeout=0, dynamic=false, capabilities=@PN_SYMBOL[:queue]],
incomplete-unsettled=false, initial-delivery-count=0, desired-capabilities=@PN_SYMBOL[:"DELAYED_DELIVERY"]]
+2018-11-18 10:52:52.34008 -0500 SERVER (trace) [255]:2 <- @attach(18) [name="qpid-jms:receiver:ID:c50ab67b-0ff1-41fe-84a6-7a7bace101ec:16263:2:1:some-queue",
handle=0, role=true, snd-settle-mode=0, rcv-settle-mode=0, source=@source(40) [address="some-queue",
durable=0, expiry-policy=:"link-detach", timeout=0, dynamic=false, default-outcome=@modified(39)
[delivery-failed=true], outcomes=@PN_SYMBOL[:"amqp:accepted:list", :"amqp:rejected:list",
:"amqp:released:list", :"amqp:modified:list"], capabilities=@PN_SYMBOL[:queue]], target=@target(41)
[]]
 2018-07-20 10:58:40.176528 -0400 SERVER (trace) [2] Connecting to 127.0.0.1:23731 (/home/chug/git/qpid-dispatch/src/server.c:1052)
 2018-07-20 10:58:40.176628 -0400 SERVER (trace) [2]:  -> SASL (/home/chug/git/qpid-dispatch/src/server.c:106)
 2018-07-20 10:58:40.176841 -0400 SERVER (trace) [2]:  <- SASL (/home/chug/git/qpid-dispatch/src/server.c:106)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@qpid.apache.org
For additional commands, e-mail: commits-help@qpid.apache.org


Mime
View raw message