incubator-cvs mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From br...@apache.org
Subject svn commit: r1441314 - /incubator/public/trunk/voter/voter.py
Date Fri, 01 Feb 2013 04:25:37 GMT
Author: brane
Date: Fri Feb  1 04:25:36 2013
New Revision: 1441314

URL: http://svn.apache.org/viewvc?rev=1441314&view=rev
Log:
Parse last two months of archives, including compressed mboxes.
Incidentally, fixed a bug in previous-month calculation and
drastically speeded up mbox parsing.
Also fixed a bug in obsolete closed votes pruning.

Modified:
    incubator/public/trunk/voter/voter.py

Modified: incubator/public/trunk/voter/voter.py
URL: http://svn.apache.org/viewvc/incubator/public/trunk/voter/voter.py?rev=1441314&r1=1441313&r2=1441314&view=diff
==============================================================================
--- incubator/public/trunk/voter/voter.py (original)
+++ incubator/public/trunk/voter/voter.py Fri Feb  1 04:25:36 2013
@@ -40,6 +40,7 @@ import collections
 import datetime
 import email
 import email.utils
+import gzip
 import sqlite3
 
 sys.path.insert(0, os.path.dirname(__file__))
@@ -92,24 +93,32 @@ class MBoxParser(object):
         mbox.entries.append(cls.Entry(updated, subject))
 
     @classmethod
-    def parse(cls, mbox_path, mtime):
-        with open(mbox_path, 'rt') as mbox_file:
-            mbox = cls.MBox(mtime, [])
-            text = None
-            for line in mbox_file:
-                if line.startswith('From '):
-                    cls.__append_message(text, mbox)
-                    text = ''
-                text += line
-            cls.__append_message(text, mbox)
+    def __parse_file(cls, text, mtime):
+        mbox = cls.MBox(mtime, [])
+        start = 0
+        while start < len(text):
+            end = text.find('\nFrom ', start)
+            if 0 > end:
+                end = len(text)
+            cls.__append_message(text[start:end], mbox)
+            start = end + 1             # Skip the newline
         return mbox
 
+    @classmethod
+    def parse(cls, mbox_path, mtime):
+        if mbox_path.endswith('.gz'):
+            with gzip.open(mbox_path) as mbox_file:
+                text = mbox_file.read().replace('\r\n', '\n')
+                return cls.__parse_file(text, mtime)
+        else:
+            with open(mbox_path, 'rt') as mbox_file:
+                return cls.__parse_file(mbox_file.read(), mtime)
+
 
 class VoteUpdater(object):
     """
     TODO: Docstring.
     """
-
     __subject_rx = re.compile(
         # Skip anything before the first tag
         r'^[^[]*'
@@ -135,7 +144,7 @@ class VoteUpdater(object):
         now = datetime.datetime.utcnow()
         thismonth = datetime.datetime(now.year, now.month, 1)
         if now.month == 1:
-            lastmonth = datetime.datetime(now.year - 1, now.month, 1)
+            lastmonth = datetime.datetime(now.year - 1, 12, 1)
         else:
             lastmonth = datetime.datetime(now.year, now.month - 1, 1)
 
@@ -165,10 +174,6 @@ class VoteUpdater(object):
     ParsedMBox = collections.namedtuple('ParsedMBox', ('relpath', 'mtime'))
 
     def __parse_mbox(self, mbox_path, mtime, votes):
-        # TODO: Parse compressed mboxes
-        if mbox_path.endswith('.gz'):
-            return
-
         mbox = MBoxParser.parse(mbox_path, mtime)
         for e in mbox.entries:
             parsed = self.__subject_rx.match(e.title)
@@ -205,8 +210,6 @@ class VoteUpdater(object):
         timestamp = max(m for p, r, m in mboxes)
         feed_updated = datetime.datetime.utcfromtimestamp(timestamp)
 
-        # TODO: Filter duplicate votes, but return the first and
-        #       last in every duplicate list
         database.record_votes(feed_updated,
                               (database.Vote(subject = v.subject,
                                              updated = v.updated,
@@ -453,8 +456,8 @@ class VoteDatabase(object):
                 obsolete.append(row['sortkey'])
         if obsolete:
             with self.transaction() as txn:
-                txn.con.execute("DELETE FROM vote WHERE sortkey IN ?",
-                                (obsolete,))
+                txn.con.executemany("DELETE FROM vote WHERE sortkey = ?",
+                                    ((o,) for o in obsolete))
 
 
 def main():



---------------------------------------------------------------------
To unsubscribe, e-mail: cvs-unsubscribe@incubator.apache.org
For additional commands, e-mail: cvs-help@incubator.apache.org


Mime
View raw message