community-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From s...@apache.org
Subject svn commit: r1712089 - /comdev/reporter.apache.org/trunk/mailglomper2.py
Date Mon, 02 Nov 2015 17:16:06 GMT
Author: sebb
Date: Mon Nov  2 17:16:06 2015
New Revision: 1712089

URL: http://svn.apache.org/viewvc?rev=1712089&view=rev
Log:
Better interrupt handling:
- first interrupt is graceful stop (after current file)
- next interrupt stops abruptly, abandoning the current file

Modified:
    comdev/reporter.apache.org/trunk/mailglomper2.py

Modified: comdev/reporter.apache.org/trunk/mailglomper2.py
URL: http://svn.apache.org/viewvc/comdev/reporter.apache.org/trunk/mailglomper2.py?rev=1712089&r1=1712088&r2=1712089&view=diff
==============================================================================
--- comdev/reporter.apache.org/trunk/mailglomper2.py (original)
+++ comdev/reporter.apache.org/trunk/mailglomper2.py Mon Nov  2 17:16:06 2015
@@ -34,9 +34,18 @@ def tsprint(s): # print with timestamp
 interrupted = False
 
 def handle(signum, frame):
+    """
+    Handles signals, e.g. ^C (SIGINT) and kill (SIGTERM)
+    Sets the interrupted flag on first signal (graceful shutdown)
+    Raises KeyboardInterrupt on second signal (abrupt shutdown)
+    """
     global interrupted # otherwise handler does not set the same variable
-    interrupted = True
+    if signum == 2:
+        print("") # ensure newline after ^C
     tsprint("Interrupted with %d" % signum)
+    if interrupted: # second interrupt
+        raise KeyboardInterrupt # not generated by Python because we catch its signal
+    interrupted = True
 
 tsprint("Start")
 
@@ -130,7 +139,11 @@ def weekly_stats(ml, date):
     stamp, mldata = urlutils.getIfNewer(url, stampold) # read binary URL
 
     if mldata: # we have a new/updated file to process
-        tsprint("Processing new/updated version of %s (%s > %s)" % (fname, stamp, stampold))
+        try:
+            length = mldata.headers['Content-Length']
+        except:
+            length = 'unknown'
+        tsprint("Processing %s (%s > %s) Length: %s" % (fname, stamp, stampold, length))
         ct = 0
         weekly = {}
         l = 0
@@ -166,7 +179,8 @@ def add_weeks(total, add):
             total[e] = add[e]
 
 tsprint("Started")
-signal.signal(signal.SIGINT, handle)
+
+signal.signal(signal.SIGINT, handle) # This stops Python from raising KeyboardInterrupt
 signal.signal(signal.SIGTERM, handle)
 
 lastCheckpoint = time.time() # when output files were last saved
@@ -179,26 +193,38 @@ for mlist in re.finditer(r"<a href='([-a
     mls[ml]['weekly'] = {}
 
     mlct = 0
-    for date in months:
-        try:
-            ct, weeks = weekly_stats(ml, date)
-            add_weeks(mls[ml]['weekly'], weeks)
-            for week in weeks:
-                if week >= after:
-                    mls[ml]['quarterly'][0] += weeks[week]
-                elif week >= wayafter:
-                    mls[ml]['quarterly'][1] += weeks[week]
-            tsprint("Debug: %s %s: has %u mails" % (ml, date, ct)) # total for month
-            mlct += ct
-        except urllib.error.HTTPError as err:
-            if err.code == 404:
-                tsprint("Warn: could not open %s-%s - %s" % (ml, date, err.reason))
-            else:
+    try:
+        for date in months:
+            key = ml + "-" + date
+            try:
+                begin = time.time()
+                ct, weeks = weekly_stats(ml, date)
+                add_weeks(mls[ml]['weekly'], weeks)
+                for week in weeks:
+                    if week >= after:
+                        mls[ml]['quarterly'][0] += weeks[week]
+                    elif week >= wayafter:
+                        mls[ml]['quarterly'][1] += weeks[week]
+                tsprint("Debug: %s has %u mails (%u)" % (key, ct, time.time() - begin)) #
total for month
+                mlct += ct
+            except urllib.error.HTTPError as err:
+                if err.code == 404: # Can happen for new lists
+                    tsprint("Warn: could not open %s - %s" % (key, err.reason))
+                else:
+                    tsprint(err)
+            except KeyboardInterrupt: # intercept the handlers signal so we can report it
+                tsprint("Interrupted processing of %s" % key)
+                raise # propagate, so does not get confused with graceful stop
+            except Exception as err:
                 tsprint(err)
-        except Exception as err:
-            tsprint(err)
-        if interrupted:
-            break
+            if interrupted: # break at end of file
+                tsprint("Stopping after processing %s" % key)
+                break
+    except KeyboardInterrupt: # catch the handlers signal
+        tsprint("Interrupted processing of %s" % ml)
+
+    if interrupted:
+        break
 
     tsprint("Info: %s has %u mails (%u secs)" % (ml, mlct, time.time() - start)) # total
for mail group
     now = time.time()
@@ -210,9 +236,6 @@ for mlist in re.finditer(r"<a href='([-a
         with open(__MAILDATA_CACHE,"w") as f:
             json.dump(mldcache, f, indent=1) # sort_keys is expensive
 
-    if interrupted:
-        break
-
 tsprint("Completed scanning, writing JSON files (%s)" % str(interrupted))
 with open(__MAILDATA_EXTENDED,'w+') as f:
     json.dump(mls, f, indent=1, sort_keys=True)



Mime
View raw message