ponymail-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From humbed...@apache.org
Subject [01/11] incubator-ponymail git commit: add a synch ponymail with imap tool
Date Wed, 01 Jun 2016 12:41:03 GMT
Repository: incubator-ponymail
Updated Branches:
  refs/heads/master 3acd303a4 -> 43d70910c


add a synch ponymail with imap tool


Project: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/commit/cefa48d0
Tree: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/tree/cefa48d0
Diff: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/diff/cefa48d0

Branch: refs/heads/master
Commit: cefa48d0baef7a9cd37c53f9dcbc3e52670c329a
Parents: a383086
Author: Sam Ruby <rubys@intertwingly.net>
Authored: Tue May 3 14:21:49 2016 -0400
Committer: Sam Ruby <rubys@intertwingly.net>
Committed: Tue May 3 14:21:49 2016 -0400

----------------------------------------------------------------------
 tools/archiver.py  |  25 +++++--
 tools/sync-imap.py | 176 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 195 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/cefa48d0/tools/archiver.py
----------------------------------------------------------------------
diff --git a/tools/archiver.py b/tools/archiver.py
index ec1fef7..8cdbcbf 100644
--- a/tools/archiver.py
+++ b/tools/archiver.py
@@ -75,6 +75,7 @@ def parse_attachment(part):
         dispositions = cd.strip().split(";")
         if dispositions[0].lower() == "attachment":
             fd = part.get_payload(decode=True)
+            if not fd: return None, None
             attachment = {}
             attachment['content_type'] = part.get_content_type()
             attachment['size'] = len(fd)
@@ -83,7 +84,8 @@ def parse_attachment(part):
             b64 = codecs.encode(fd, "base64").decode('ascii', 'ignore')
             attachment['hash'] = h
             for param in dispositions[1:]:
-                key,val = param.split("=")
+                if not '=' in param: continue
+                key,val = param.split("=", 1)
                 if key.lower().strip() == "filename":
                     val = val.strip(' "')
                     print("Found attachment: %s" % val)
@@ -282,8 +284,9 @@ class Archiver(object):
                             body = body.encode('utf-8')
                     except:
                         body = None
-        if body:
-            attachments, contents = self.msgfiles(msg)
+
+        attachments, contents = self.msgfiles(msg)
+        if body or attachments:
             private = False
             if hasattr(mlist, 'archive_public') and mlist.archive_public == True:
                 private = False
@@ -293,7 +296,8 @@ class Archiver(object):
                 private = True
             pmid = mid
             try:
-                mid = "%s@%s@%s" % (hashlib.sha224(body if type(body) is bytes else body.encode('ascii',
'ignore')).hexdigest(), uid_mdate, lid)
+                mid = "%s@%s" % (hashlib.sha224(msg.as_bytes()).hexdigest(), lid)
+                print(mid)
             except Exception as err:
                 if logger:
                     logger.warn("Could not generate MID: %s" % err)
@@ -475,6 +479,8 @@ if __name__ == '__main__':
                        help='Use the archive timestamp as the email date instead of the Date
header')
     parser.add_argument('--quiet', dest='quiet', action='store_true', 
                        help='Do not exit -1 if the email could not be parsed')
+    parser.add_argument('--verbose', dest='verbose', action='store_true', 
+                       help='Output additional log messages')
     parser.add_argument('--html2text', dest='html2text', action='store_true', 
                        help='Try to convert HTML to text if no text/plain message is found')
     args = parser.parse_args()
@@ -482,6 +488,10 @@ if __name__ == '__main__':
     if args.html2text:
         import html2text
         parseHTML = True
+
+    if args.verbose:
+        import logging
+        logging.basicConfig(stream=sys.stdout, level=logging.INFO)
         
     foo = Archiver()
     input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8', errors="ignore")
@@ -512,7 +522,7 @@ if __name__ == '__main__':
                     msg.replace_header('List-ID', msg.get(altheader))
                 except:
                     msg.add_header('list-id', msg.get(altheader))
-        
+
         # Set specific LID?
         if args.lid and len(args.lid[0]) > 3:
             try:
@@ -562,6 +572,9 @@ if __name__ == '__main__':
                 lid = foo.archive_message(msg_metadata, msg)
                 print("%s: Done archiving to %s!" % (email.utils.formatdate(), lid))
             except Exception as err:
+                if args.verbose:
+                    import traceback
+                    traceback.print_exc()
                 print("Archiving failed!: %s" % err)
                 raise Exception("Archiving to ES failed")
         else:
@@ -572,4 +585,4 @@ if __name__ == '__main__':
         else:
             print("Could not parse email: %s" % err)
             sys.exit(-1)
-            
\ No newline at end of file
+            

http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/cefa48d0/tools/sync-imap.py
----------------------------------------------------------------------
diff --git a/tools/sync-imap.py b/tools/sync-imap.py
new file mode 100755
index 0000000..43d457d
--- /dev/null
+++ b/tools/sync-imap.py
@@ -0,0 +1,176 @@
+#!/usr/bin/env python3.4
+# -*- coding: utf-8 -*-
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+ #the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+""" Syncronize ponymail with an imap server.
+
+Fetches message-ids from both ponymail and an imap server, and adds or
+deletes whatever is necessary from ponymail to make it match.
+
+See usage for instructions.
+
+"""
+
+import argparse
+import configparser
+import elasticsearch
+import imaplib
+import os
+import pwd
+import subprocess
+import sys
+import re
+
+# change working directory to location of this script
+
+os.chdir(os.path.dirname(os.path.abspath(__file__)))
+
+# global defaults
+
+es_list = None
+imap_host = 'localhost'
+imap_port = 993
+imap_user = pwd.getpwuid(os.getuid()).pw_name
+imap_password = None
+imap_folder = 'INBOX'
+html2text = False
+verbose = False
+
+# fetch config overrides
+
+config = configparser.RawConfigParser()
+config.read('ponymail.cfg')
+iname = config.get("elasticsearch", "dbname")
+if config.has_option('imap', 'host'):
+    imap_host = config.get('imap', 'host')
+if config.has_option('imap', 'port'):
+    imap_port = config.getint('imap', 'port')
+if config.has_option('imap', 'user'):
+    imap_user = config.getint('imap', 'user')
+if config.has_option('imap', 'password'):
+    imap_password = config.getint('imap', 'password')
+
+# fetch command line argument overrides
+
+parser = argparse.ArgumentParser(description='Command line options.')
+parser.add_argument('--list', dest='list', type=str, nargs=1,
+                   help='ElasticSearch list')
+parser.add_argument('--host', dest='host', type=str, nargs=1,
+                   help='IMAP host')
+parser.add_argument('--port', dest='port', type=int, nargs=1,
+                   help='IMAP port')
+parser.add_argument('--user', dest='user', type=str, nargs=1,
+                   help='IMAP user')
+parser.add_argument('--password', dest='password', type=str, nargs=1,
+                   help='IMAP password')
+parser.add_argument('--folder', dest='folder', type=str, nargs=1,
+                   help='IMAP folder')
+parser.add_argument('--html2text', dest='html2text', action='store_true',
+                   help='Try to convert HTML to text if no text/plain message is found')
+parser.add_argument('--verbose', dest='verbose', action='store_true', 
+                   help='Output additional log messages')
+
+args = parser.parse_args()
+
+if args.list:
+    es_list = args.list[0]
+if args.host:
+    imap_host = args.host[0]
+if args.port:
+    imap_port = args.port[0]
+if args.user:
+    imap_user = args.user[0]
+if args.password:
+    imap_password = args.password[0]
+if args.folder:
+    imap_folder = args.folder[0]
+if args.html2text:
+    html2text = True
+if args.verbose:
+    verbose = True
+
+if not es_list or not imap_password:
+    parser.print_help()
+    sys.exit(-1)
+
+es_list = "<%s>" % es_list.strip("<>") # We need <> around it!
+
+# fetch message-id => _id pairs from elasticsearch
+
+es = elasticsearch.Elasticsearch()
+result = es.search(scroll = '5m', 
+    body = {
+        'size': 1024, 
+        'fields': ['message-id'], 
+        'query': {'match': {'list': es_list}}
+    }
+)
+
+db = {}
+while len(result['hits']['hits']) > 0:
+    for hit in result['hits']['hits']:
+        db[hit['fields']['message-id'][0]] = hit['_id']
+    result = es.scroll(scroll='5m', scroll_id=result['_scroll_id'])
+
+# fetch message-id => uid pairs from imap
+
+imap = imaplib.IMAP4_SSL(imap_host, imap_port)
+imap.login(imap_user, imap_password)
+imap.select(imap_folder, readonly=True)
+results = imap.uid('search', None, 'ALL')
+uids = b','.join(results[1][0].split())
+results = imap.uid('fetch', uids, '(BODY[HEADER.FIELDS (MESSAGE-ID)])')
+
+mail = {}
+uid_re = re.compile(b'^\d+ \(UID (\d+) BODY\[')
+mid_re = re.compile(b'^Message-ID:\s*(.*?)\s*$', re.I)
+uid = None
+for result in results[1]:
+    for line in result:
+        if isinstance(line, bytes):
+            match = uid_re.match(line)
+            if match:
+                uid = match.group(1)
+            else:
+                 match = mid_re.match(line)
+                 if match:
+                     try:
+                         mail[match.group(1).decode('utf-8')] = uid
+                         uid = None
+                     except ValueError:
+                         pass
+
+# delete items from elasticsearch that are not present in imap
+
+for mid, _id in db.items():
+    if not mid in mail:
+        es.delete(index=iname, id=_id, doc_type='mbox')
+        es.delete(index=iname, id=_id, doc_type='mbox_source')
+        print("deleted: " + mid)
+
+# add new items to elasticsearch from imap
+
+for mid, uid in mail.items():
+    if not mid in db:
+        argv = [sys.executable, 'archiver.py', '--lid=%s' % es_list]
+        if verbose: argv.append('--verbose')
+        if html2text: argv.append('--html2text')
+        child = subprocess.Popen(argv, stdin=subprocess.PIPE)
+        child.stdin.write(imap.uid('fetch', uid, '(RFC822)')[1][0][1])
+        child.stdin.close()
+        rc = child.wait()
+        print("inserted: %s, rc = %d" % (mid, rc))
+


Mime
View raw message