ponymail-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From humbed...@apache.org
Subject [4/4] incubator-ponymail git commit: Add tool for retrying failed documents saved by --dumponfail
Date Wed, 03 Jan 2018 00:34:36 GMT
Add tool for retrying failed documents saved by --dumponfail

This tool can retry pushing documents to ES if they
previously failed and were saved by --dumponfail.
This fixes #423.


Project: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/commit/3629f348
Tree: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/tree/3629f348
Diff: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/diff/3629f348

Branch: refs/heads/master
Commit: 3629f348b28b07a3c46fa0ced83d1fcbe4181c96
Parents: 1b008a6
Author: Daniel Gruno <humbedooh@apache.org>
Authored: Wed Jan 3 01:33:49 2018 +0100
Committer: Daniel Gruno <humbedooh@apache.org>
Committed: Wed Jan 3 01:33:49 2018 +0100

----------------------------------------------------------------------
 CHANGELOG.md           |  1 +
 tools/push-failures.py | 99 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 100 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/3629f348/CHANGELOG.md
----------------------------------------------------------------------
diff --git a/CHANGELOG.md b/CHANGELOG.md
index f79f9e4..a76b36d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,5 @@
 ## CHANGES in 0.10:
+- Enh: Optionally dump JSON documents to disk if push to ES fails (#423).
 - Bug: GUI can hide reply text when analysing quoted text (#421)
 - mbox.lua output filename does not match ISO standard (#420)
 - Added empty line quotes to JS compaction feature

http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/3629f348/tools/push-failures.py
----------------------------------------------------------------------
diff --git a/tools/push-failures.py b/tools/push-failures.py
new file mode 100644
index 0000000..d3c22c8
--- /dev/null
+++ b/tools/push-failures.py
@@ -0,0 +1,99 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+""" Utility for retrying docs that we failed to index earlier.
+"""
+
+import sys
+import time
+import configparser
+import argparse
+import json
+import os
+import certifi
+
+try:
+    from elasticsearch import Elasticsearch, helpers
+except:
+    print("Sorry, you need to install the elasticsearch and formatflowed modules from pip
first.")
+    sys.exit(-1)
+
+
+# Fetch config
+config = configparser.RawConfigParser()
+config.read('ponymail.cfg')
+
+dbname = config.get("elasticsearch", "dbname")
+ssl = config.get("elasticsearch", "ssl", fallback="false").lower() == 'true'
+uri = config.get("elasticsearch", "uri", fallback="")
+
+es = Elasticsearch([
+    {
+        'host': config.get("elasticsearch", "hostname"),
+        'port': int(config.get("elasticsearch", "port")),
+        'use_ssl': ssl,
+        'url_prefix': uri,
+        'ca_certs': certifi.where()
+    }],
+    max_retries=5,
+    retry_on_timeout=True
+    )
+
+parser = argparse.ArgumentParser(description='Command line options.')
+# Cannot have both source and mid as input
+parser.add_argument('--source', dest='dumpdir',
+                   help='Path to the directory containing the JSON documents that failed
to index')
+
+args = parser.parse_args()
+
+dumpDir = args.dumpdir if args.dumpdir else '.'
+
+files = [f for f in os.listdir(dumpDir) if os.path.isfile(os.path.join(dumpDir, f)) and f.endswith('.json')]
+
+for f in files:
+    fpath = os.path.join(dumpDir, f)
+    print("Processing %s" % fpath)
+    with open(fpath, "r") as f:
+        ojson = json.load(f)
+        if 'mbox' in ojson and 'mbox_source' in ojson:
+            es.index(
+                index=dbname,
+                doc_type="mbox",
+                id=ojson['id'],
+                body = ojson['mbox']
+            )
+            
+            es.index(
+                index=dbname,
+                doc_type="mbox_source",
+                id=ojson['id'],
+                body = ojson['mbox_source']
+            )
+            
+            if 'attachments' in ojson and ojson['attachments']:
+                for k, v in ojson['attachments'].items():
+                    es.index(
+                        index=dbname,
+                        doc_type="attachment",
+                        id=k,
+                        body = {
+                            'source': v
+                        }
+                    )
+        f.close()
+    os.unlink(fpath)
+print ("All done! Pushed %u documents to ES." % len(files))


Mime
View raw message