incubator-allura-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tvansteenbu...@apache.org
Subject [04/14] git commit: [#6480] Add trac ticket importer plugin
Date Wed, 07 Aug 2013 13:36:57 GMT
[#6480] Add trac ticket importer plugin

Signed-off-by: Tim Van Steenburgh <tvansteenburgh@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/incubator-allura/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-allura/commit/bc49a02b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-allura/tree/bc49a02b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-allura/diff/bc49a02b

Branch: refs/heads/tv/6480
Commit: bc49a02bc6bcd6501d7c1f75a6b894706a0c6e31
Parents: c192a84
Author: Tim Van Steenburgh <tvansteenburgh@gmail.com>
Authored: Fri Aug 2 17:00:29 2013 +0000
Committer: Tim Van Steenburgh <tvansteenburgh@gmail.com>
Committed: Wed Aug 7 12:26:52 2013 +0000

----------------------------------------------------------------------
 Allura/allura/scripts/trac_export.py            | 280 +++++++++++++++++++
 ForgeImporters/forgeimporters/trac/__init__.py  |  17 ++
 .../trac/templates/tickets/index.html           |  42 +++
 ForgeImporters/forgeimporters/trac/tickets.py   | 107 +++++++
 .../forgetracker/scripts/import_tracker.py      |  18 +-
 scripts/trac_export.py                          | 257 +----------------
 6 files changed, 458 insertions(+), 263 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/bc49a02b/Allura/allura/scripts/trac_export.py
----------------------------------------------------------------------
diff --git a/Allura/allura/scripts/trac_export.py b/Allura/allura/scripts/trac_export.py
new file mode 100644
index 0000000..aeb14ea
--- /dev/null
+++ b/Allura/allura/scripts/trac_export.py
@@ -0,0 +1,280 @@
+#!/usr/bin/env python
+
+#       Licensed to the Apache Software Foundation (ASF) under one
+#       or more contributor license agreements.  See the NOTICE file
+#       distributed with this work for additional information
+#       regarding copyright ownership.  The ASF licenses this file
+#       to you under the Apache License, Version 2.0 (the
+#       "License"); you may not use this file except in compliance
+#       with the License.  You may obtain a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#       Unless required by applicable law or agreed to in writing,
+#       software distributed under the License is distributed on an
+#       "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#       KIND, either express or implied.  See the License for the
+#       specific language governing permissions and limitations
+#       under the License.
+
+
+import sys
+import csv
+import urlparse
+import urllib2
+import json
+import time
+import re
+from optparse import OptionParser
+from itertools import islice
+from datetime import datetime
+
+import feedparser
+from html2text import html2text
+from BeautifulSoup import BeautifulSoup, NavigableString
+import dateutil.parser
+import pytz
+
+
+def parse_options():
+    optparser = OptionParser(usage=''' %prog <Trac URL>
+
+Export ticket data from a Trac instance''')
+    optparser.add_option('-o', '--out-file', dest='out_filename', help='Write to file (default
stdout)')
+    optparser.add_option('--no-attachments', dest='do_attachments', action='store_false',
default=True, help='Export attachment info')
+    optparser.add_option('--only-tickets', dest='only_tickets', action='store_true', help='Export
only ticket list')
+    optparser.add_option('--start', dest='start_id', type='int', default=1, help='Start with
given ticket numer (or next accessible)')
+    optparser.add_option('--limit', dest='limit', type='int', default=None, help='Limit number
of tickets')
+    optparser.add_option('-v', '--verbose', dest='verbose', action='store_true', help='Verbose
operation')
+    options, args = optparser.parse_args()
+    if len(args) != 1:
+        optparser.error("Wrong number of arguments.")
+    return options, args
+
+
+class TracExport(object):
+
+    PAGE_SIZE = 100
+    TICKET_URL = 'ticket/%d'
+    QUERY_MAX_ID_URL  = 'query?col=id&order=id&desc=1&max=2'
+    QUERY_BY_PAGE_URL = 'query?col=id&col=time&col=changetime&order=id&max='
+ str(PAGE_SIZE)+ '&page=%d'
+    ATTACHMENT_LIST_URL = 'attachment/ticket/%d/'
+    ATTACHMENT_URL = 'raw-attachment/ticket/%d/%s'
+
+    FIELD_MAP = {
+        'reporter': 'submitter',
+        'owner': 'assigned_to',
+    }
+
+    def __init__(self, base_url, start_id=1):
+        """start_id - start with at least that ticket number (actual returned
+                      ticket may have higher id if we don't have access to exact
+                      one).
+        """
+        self.base_url = base_url.rstrip('/') + '/'
+        # Contains additional info for a ticket which cannot
+        # be get with single-ticket export (create/mod times is
+        # and example).
+        self.ticket_map = {}
+        self.start_id = start_id
+        self.page = (start_id - 1) / self.PAGE_SIZE + 1
+        self.ticket_queue = self.next_ticket_ids()
+
+    def remap_fields(self, dict):
+        "Remap fields to adhere to standard taxonomy."
+        out = {}
+        for k, v in dict.iteritems():
+            out[self.FIELD_MAP.get(k, k)] = v
+
+        out['id'] = int(out['id'])
+        if 'private' in out:
+            out['private'] = bool(int(out['private']))
+        return out
+
+    def full_url(self, suburl, type=None):
+        url = urlparse.urljoin(self.base_url, suburl)
+        if type is None:
+            return url
+        glue = '&' if '?' in suburl else '?'
+        return  url + glue + 'format=' + type
+
+    @staticmethod
+    def log_url(url):
+        if options.verbose:
+            print >>sys.stderr, url
+
+    @classmethod
+    def trac2z_date(cls, s):
+        d = dateutil.parser.parse(s)
+        d = d.astimezone(pytz.UTC)
+        return d.strftime("%Y-%m-%dT%H:%M:%SZ")
+
+    @staticmethod
+    def match_pattern(regexp, string):
+        m = re.match(regexp, string)
+        assert m
+        return m.group(1)
+
+    def csvopen(self, url):
+        self.log_url(url)
+        f = urllib2.urlopen(url)
+        # Trac doesn't throw 403 error, just shows normal 200 HTML page
+        # telling that access denied. So, we'll emulate 403 ourselves.
+        # TODO: currently, any non-csv result treated as 403.
+        if not f.info()['Content-Type'].startswith('text/csv'):
+            raise urllib2.HTTPError(url, 403, 'Forbidden - emulated', f.info(), f)
+        return f
+
+    def parse_ticket_body(self, id):
+        # Use CSV export to get ticket fields
+        url = self.full_url(self.TICKET_URL % id, 'csv')
+        f = self.csvopen(url)
+        reader = csv.DictReader(f)
+        ticket_fields = reader.next()
+        ticket_fields['class'] = 'ARTIFACT'
+        return self.remap_fields(ticket_fields)
+
+    def parse_ticket_comments(self, id):
+        # Use RSS export to get ticket comments
+        url = self.full_url(self.TICKET_URL % id, 'rss')
+        self.log_url(url)
+        d = feedparser.parse(url)
+        res = []
+        for comment in d['entries']:
+            c = {}
+            c['submitter'] = comment.author
+            c['date'] = comment.updated_parsed
+            c['comment'] = html2text(comment.summary)
+            c['class'] = 'COMMENT'
+            res.append(c)
+        return res
+
+    def parse_ticket_attachments(self, id):
+        SIZE_PATTERN = r'(\d+) bytes'
+        TIMESTAMP_PATTERN = r'(.+) in Timeline'
+        # Scrape HTML to get ticket attachments
+        url = self.full_url(self.ATTACHMENT_LIST_URL % id)
+        self.log_url(url)
+        f = urllib2.urlopen(url)
+        soup = BeautifulSoup(f)
+        attach = soup.find('div', id='attachments')
+        list = []
+        while attach:
+            attach = attach.findNext('dt')
+            if not attach:
+                break
+            d = {}
+            d['filename'] = attach.a['href'].rsplit('/', 1)[1]
+            d['url'] = self.full_url(self.ATTACHMENT_URL % (id, d['filename']))
+            size_s = attach.span['title']
+            d['size'] = int(self.match_pattern(SIZE_PATTERN, size_s))
+            timestamp_s = attach.find('a', {'class': 'timeline'})['title']
+            d['date'] = self.trac2z_date(self.match_pattern(TIMESTAMP_PATTERN, timestamp_s))
+            d['by'] = attach.find(text=re.compile('added by')).nextSibling.renderContents()
+            d['description'] = ''
+            # Skip whitespace
+            while attach.nextSibling and type(attach.nextSibling) is NavigableString:
+                attach = attach.nextSibling
+            # if there's a description, there will be a <dd> element, other immediately
next <dt>
+            if attach.nextSibling and attach.nextSibling.name == 'dd':
+                desc_el = attach.nextSibling
+                if desc_el:
+                    # TODO: Convert to Allura link syntax as needed
+                    d['description'] = ''.join(desc_el.findAll(text=True)).strip()
+            list.append(d)
+        return list
+
+    def get_max_ticket_id(self):
+        url = self.full_url(self.QUERY_MAX_ID_URL, 'csv')
+        f = self.csvopen(url)
+        reader = csv.DictReader(f)
+        fields = reader.next()
+        print fields
+        return int(fields['id'])
+
+    def get_ticket(self, id, extra={}):
+        '''Get ticket with given id
+        extra: extra fields to add to ticket (parsed elsewhere)
+        '''
+        t = self.parse_ticket_body(id)
+        t['comments'] = self.parse_ticket_comments(id)
+        if options.do_attachments:
+            atts = self.parse_ticket_attachments(id)
+            if atts:
+                t['attachments'] = atts
+        t.update(extra)
+        return t
+
+    def next_ticket_ids(self):
+        'Go thru ticket list and collect available ticket ids.'
+        # We could just do CSV export, which by default dumps entire list
+        # Alas, for many busy servers with long ticket list, it will just
+        # time out. So, let's paginate it instead.
+        res = []
+
+        url = self.full_url(self.QUERY_BY_PAGE_URL % self.page, 'csv')
+        try:
+            f = self.csvopen(url)
+        except urllib2.HTTPError, e:
+            if 'emulated' in e.msg:
+                body = e.fp.read()
+                if 'beyond the number of pages in the query' in body or 'Log in with a SourceForge
account' in body:
+                    raise StopIteration
+            raise
+        reader = csv.reader(f)
+        cols = reader.next()
+        for r in reader:
+            if r and r[0].isdigit():
+                id = int(r[0])
+                extra = {'date': self.trac2z_date(r[1]), 'date_updated': self.trac2z_date(r[2])}
+                res.append((id, extra))
+        self.page += 1
+
+        return res
+
+    def __iter__(self):
+        return self
+
+    def next(self):
+        while True:
+            # queue empty, try to fetch more
+            if len(self.ticket_queue) == 0:
+                self.ticket_queue = self.next_ticket_ids()
+            # there aren't any more, we're really done
+            if len(self.ticket_queue) == 0:
+                raise StopIteration
+            id, extra = self.ticket_queue.pop(0)
+            if id >= self.start_id:
+                break
+        return self.get_ticket(id, extra)
+
+
+class DateJSONEncoder(json.JSONEncoder):
+    def default(self, obj):
+        if isinstance(obj, time.struct_time):
+            return time.strftime('%Y-%m-%dT%H:%M:%SZ', obj)
+        return json.JSONEncoder.default(self, obj)
+
+
+def main():
+    options, args = parse_options()
+    ex = TracExport(args[0], start_id=options.start_id)
+    # Implement iterator sequence limiting using islice()
+    doc = [t for t in islice(ex, options.limit)]
+
+    if not options.only_tickets:
+        doc = {
+            'class': 'PROJECT',
+            'trackers': {'default': {'artifacts': doc}}
+        }
+
+    out_file = sys.stdout
+    if options.out_filename:
+        out_file = open(options.out_filename, 'w')
+    out_file.write(json.dumps(doc, cls=DateJSONEncoder, indent=2, sort_keys=True))
+    # It's bad habit not to terminate lines
+    out_file.write('\n')
+
+
+if __name__ == '__main__':
+    main()

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/bc49a02b/ForgeImporters/forgeimporters/trac/__init__.py
----------------------------------------------------------------------
diff --git a/ForgeImporters/forgeimporters/trac/__init__.py b/ForgeImporters/forgeimporters/trac/__init__.py
new file mode 100644
index 0000000..77505f1
--- /dev/null
+++ b/ForgeImporters/forgeimporters/trac/__init__.py
@@ -0,0 +1,17 @@
+#       Licensed to the Apache Software Foundation (ASF) under one
+#       or more contributor license agreements.  See the NOTICE file
+#       distributed with this work for additional information
+#       regarding copyright ownership.  The ASF licenses this file
+#       to you under the Apache License, Version 2.0 (the
+#       "License"); you may not use this file except in compliance
+#       with the License.  You may obtain a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#       Unless required by applicable law or agreed to in writing,
+#       software distributed under the License is distributed on an
+#       "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#       KIND, either express or implied.  See the License for the
+#       specific language governing permissions and limitations
+#       under the License.
+

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/bc49a02b/ForgeImporters/forgeimporters/trac/templates/tickets/index.html
----------------------------------------------------------------------
diff --git a/ForgeImporters/forgeimporters/trac/templates/tickets/index.html b/ForgeImporters/forgeimporters/trac/templates/tickets/index.html
new file mode 100644
index 0000000..eaf9aac
--- /dev/null
+++ b/ForgeImporters/forgeimporters/trac/templates/tickets/index.html
@@ -0,0 +1,42 @@
+{#-
+       Licensed to the Apache Software Foundation (ASF) under one
+       or more contributor license agreements.  See the NOTICE file
+       distributed with this work for additional information
+       regarding copyright ownership.  The ASF licenses this file
+       to you under the Apache License, Version 2.0 (the
+       "License"); you may not use this file except in compliance
+       with the License.  You may obtain a copy of the License at
+
+         http://www.apache.org/licenses/LICENSE-2.0
+
+       Unless required by applicable law or agreed to in writing,
+       software distributed under the License is distributed on an
+       "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+       KIND, either express or implied.  See the License for the
+       specific language governing permissions and limitations
+       under the License.
+-#}
+{% extends g.theme.master %}
+
+{% block title %}
+{{c.project.name}} / Import Trac Tickets
+{% endblock %}
+
+{% block header %}
+Import tickets from Trac
+{% endblock %}
+
+{% block content %}
+<form action="create" method="post" class="pad">
+  <label for="trac_url">URL of the Trac instance</label>
+  <input name="trac_url" />
+
+  <label for="mount_label">Label</label>
+  <input name="mount_label" value="Source" />
+
+  <label for="mount_point">Mount Point</label>
+  <input name="mount_point" value="source" />
+
+  <input type="submit" />
+</form>
+{% endblock %}

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/bc49a02b/ForgeImporters/forgeimporters/trac/tickets.py
----------------------------------------------------------------------
diff --git a/ForgeImporters/forgeimporters/trac/tickets.py b/ForgeImporters/forgeimporters/trac/tickets.py
new file mode 100644
index 0000000..cc31741
--- /dev/null
+++ b/ForgeImporters/forgeimporters/trac/tickets.py
@@ -0,0 +1,107 @@
+#       Licensed to the Apache Software Foundation (ASF) under one
+#       or more contributor license agreements.  See the NOTICE file
+#       distributed with this work for additional information
+#       regarding copyright ownership.  The ASF licenses this file
+#       to you under the Apache License, Version 2.0 (the
+#       "License"); you may not use this file except in compliance
+#       with the License.  You may obtain a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#       Unless required by applicable law or agreed to in writing,
+#       software distributed under the License is distributed on an
+#       "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#       KIND, either express or implied.  See the License for the
+#       specific language governing permissions and limitations
+#       under the License.
+
+from datetime import (
+        datetime,
+        timedelta,
+        )
+import json
+
+import formencode as fe
+from formencode import validators as fev
+
+from pylons import tmpl_context as c
+from pylons import app_globals as g
+from tg import (
+        config,
+        expose,
+        redirect,
+        validate,
+        )
+from tg.decorators import (
+        with_trailing_slash,
+        without_trailing_slash,
+        )
+
+from allura.controllers import BaseController
+from allura.lib.decorators import require_post
+from allura.lib.import_api import AlluraImportApiClient
+from allura.model import ApiTicket
+from allura.scripts.trac_export import (
+        TracExport,
+        DateJSONEncoder,
+        )
+
+from forgeimporters.base import ToolImporter
+from forgetracker.tracker_main import ForgeTrackerApp
+from forgetracker.script.import_tracker import import_tracker
+
+
+class TracTicketImportSchema(fe.Schema):
+    trac_url = fev.URL(not_empty=True)
+    mount_point = fev.UnicodeString()
+    mount_label = fev.UnicodeString()
+
+
+class TracTicketImportController(BaseController):
+    @with_trailing_slash
+    @expose('jinja:forgeimporters.trac:templates/tickets/index.html')
+    def index(self, **kw):
+        return {}
+
+    @without_trailing_slash
+    @expose()
+    @require_post()
+    @validate(TracTicketImportSchema(), error_handler=index)
+    def create(self, trac_url, mount_point, mount_label, **kw):
+        app = TracTicketImporter.import_tool(c.project,
+                mount_point=mount_point,
+                mount_label=mount_label,
+                trac_url=trac_url,
+                user=c.user)
+        redirect(app.url())
+
+
+class TracTicketImporter(ToolImporter):
+    target_app = ForgeTrackerApp
+    source = 'Trac'
+    controller = TracTicketImportController
+    tool_label = 'Trac Ticket Importer'
+    tool_description = 'Import your tickets from Trac'
+
+    def import_tool(self, project=None, mount_point=None, mount_label=None,
+            trac_url=None, user=None):
+        """ Import Trac tickets into a new Allura Tracker tool.
+
+        """
+        mount_point = mount_point or 'tickets'
+        app = project.install_app(
+                'Tickets',
+                mount_point=mount_point,
+                mount_label=mount_label or 'Tickets',
+                )
+        export = TracExport(trac_url)
+        export_string = json.dumps(export, cls=DateJSONEncoder)
+        api_ticket = ApiTicket(user_id=user._id,
+                capabilities={"import": ["Projects", project.shortname]},
+                expires=datetime.utcnow() + timedelta(minutes=60))
+        cli = AlluraImportApiClient(config['base_url'], api_ticket.api_key,
+                api_ticket.secret_key, False)
+        import_tracker(cli, project.shortname, mount_point, {},
+                export_string, validate=False)
+        g.post_event('project_updated')
+        return app

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/bc49a02b/ForgeTracker/forgetracker/scripts/import_tracker.py
----------------------------------------------------------------------
diff --git a/ForgeTracker/forgetracker/scripts/import_tracker.py b/ForgeTracker/forgetracker/scripts/import_tracker.py
index 506e771..32b4d1c 100644
--- a/ForgeTracker/forgetracker/scripts/import_tracker.py
+++ b/ForgeTracker/forgetracker/scripts/import_tracker.py
@@ -25,7 +25,8 @@ from allura.lib.import_api import AlluraImportApiClient
 
 log = logging.getLogger(__name__)
 
-def import_tracker(cli, project, tool, import_options, options, doc_txt, validate=True, verbose=False):
+def import_tracker(cli, project, tool, import_options, doc_txt,
+        validate=True, verbose=False, cont=False):
     url = '/rest/p/' + project + '/' + tool
     if validate:
         url += '/validate_import'
@@ -33,8 +34,8 @@ def import_tracker(cli, project, tool, import_options, options, doc_txt,
validat
         url += '/perform_import'
 
     existing_map = {}
-    if options.cont:
-        existing_tickets = cli.call('/rest/p/' + options.project + '/' + options.tracker
+ '/')['tickets']
+    if cont:
+        existing_tickets = cli.call('/rest/p/' + project + '/' + tool + '/')['tickets']
         for t in existing_tickets:
             existing_map[t['ticket_num']] = t['summary']
 
@@ -46,12 +47,12 @@ def import_tracker(cli, project, tool, import_options, options, doc_txt,
validat
     else:
         tickets_in = doc
 
-    if options.verbose:
+    if verbose:
         print "Processing %d tickets" % len(tickets_in)
 
     for cnt, ticket_in in enumerate(tickets_in):
         if ticket_in['id'] in existing_map:
-            if options.verbose:
+            if verbose:
                 print 'Ticket id %d already exists, skipping' % ticket_in['id']
             continue
         doc_import={}
@@ -60,7 +61,7 @@ def import_tracker(cli, project, tool, import_options, options, doc_txt,
validat
         doc_import['trackers']['default']['artifacts'] = [ticket_in]
         res = cli.call(url, doc=json.dumps(doc_import), options=json.dumps(import_options))
         assert res['status'] and not res['errors']
-        if options.validate:
+        if validate:
             if res['warnings']:
                 print "Ticket id %s warnings: %s" % (ticket_in['id'], res['warnings'])
         else:
@@ -93,9 +94,10 @@ class ImportTracker(ScriptTask):
         import_options['user_map'] = user_map
         cli = AlluraImportApiClient(options.base_url, options.api_key, options.secret_key,
options.verbose)
         doc_txt = open(options.file_data).read()
-        import_tracker(cli, options.project, options.tracker, import_options, options, doc_txt,
+        import_tracker(cli, options.project, options.tracker, import_options, doc_txt,
                        validate=options.validate,
-                       verbose=options.verbose)
+                       verbose=options.verbose,
+                       cont=options.cont)
 
     @classmethod
     def parser(cls):

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/bc49a02b/scripts/trac_export.py
----------------------------------------------------------------------
diff --git a/scripts/trac_export.py b/scripts/trac_export.py
index 002a1e8..ac90b17 100755
--- a/scripts/trac_export.py
+++ b/scripts/trac_export.py
@@ -17,259 +17,6 @@
 #       specific language governing permissions and limitations
 #       under the License.
 
-
-import sys
-import csv
-import urlparse
-import urllib2
-import json
-import time
-import re
-from optparse import OptionParser
-from itertools import islice
-from datetime import datetime
-
-import feedparser
-from html2text import html2text
-from BeautifulSoup import BeautifulSoup, NavigableString
-import dateutil.parser
-import pytz
-
-
-def parse_options():
-    optparser = OptionParser(usage=''' %prog <Trac URL>
-
-Export ticket data from a Trac instance''')
-    optparser.add_option('-o', '--out-file', dest='out_filename', help='Write to file (default
stdout)')
-    optparser.add_option('--no-attachments', dest='do_attachments', action='store_false',
default=True, help='Export attachment info')
-    optparser.add_option('--only-tickets', dest='only_tickets', action='store_true', help='Export
only ticket list')
-    optparser.add_option('--start', dest='start_id', type='int', default=1, help='Start with
given ticket numer (or next accessible)')
-    optparser.add_option('--limit', dest='limit', type='int', default=None, help='Limit number
of tickets')
-    optparser.add_option('-v', '--verbose', dest='verbose', action='store_true', help='Verbose
operation')
-    options, args = optparser.parse_args()
-    if len(args) != 1:
-        optparser.error("Wrong number of arguments.")
-    return options, args
-
-
-class TracExport(object):
-
-    PAGE_SIZE = 100
-    TICKET_URL = 'ticket/%d'
-    QUERY_MAX_ID_URL  = 'query?col=id&order=id&desc=1&max=2'
-    QUERY_BY_PAGE_URL = 'query?col=id&col=time&col=changetime&order=id&max='
+ str(PAGE_SIZE)+ '&page=%d'
-    ATTACHMENT_LIST_URL = 'attachment/ticket/%d/'
-    ATTACHMENT_URL = 'raw-attachment/ticket/%d/%s'
-
-    FIELD_MAP = {
-        'reporter': 'submitter',
-        'owner': 'assigned_to',
-    }
-
-    def __init__(self, base_url, start_id=1):
-        """start_id - start with at least that ticket number (actual returned
-                      ticket may have higher id if we don't have access to exact
-                      one).
-        """
-        self.base_url = base_url.rstrip('/') + '/'
-        # Contains additional info for a ticket which cannot
-        # be get with single-ticket export (create/mod times is
-        # and example).
-        self.ticket_map = {}
-        self.start_id = start_id
-        self.page = (start_id - 1) / self.PAGE_SIZE + 1
-        self.ticket_queue = self.next_ticket_ids()
-
-    def remap_fields(self, dict):
-        "Remap fields to adhere to standard taxonomy."
-        out = {}
-        for k, v in dict.iteritems():
-            out[self.FIELD_MAP.get(k, k)] = v
-
-        out['id'] = int(out['id'])
-        if 'private' in out:
-            out['private'] = bool(int(out['private']))
-        return out
-
-    def full_url(self, suburl, type=None):
-        url = urlparse.urljoin(self.base_url, suburl)
-        if type is None:
-            return url
-        glue = '&' if '?' in suburl else '?'
-        return  url + glue + 'format=' + type
-
-    @staticmethod
-    def log_url(url):
-        if options.verbose:
-            print >>sys.stderr, url
-
-    @classmethod
-    def trac2z_date(cls, s):
-        d = dateutil.parser.parse(s)
-        d = d.astimezone(pytz.UTC)
-        return d.strftime("%Y-%m-%dT%H:%M:%SZ")
-
-    @staticmethod
-    def match_pattern(regexp, string):
-        m = re.match(regexp, string)
-        assert m
-        return m.group(1)
-
-    def csvopen(self, url):
-        self.log_url(url)
-        f = urllib2.urlopen(url)
-        # Trac doesn't throw 403 error, just shows normal 200 HTML page
-        # telling that access denied. So, we'll emulate 403 ourselves.
-        # TODO: currently, any non-csv result treated as 403.
-        if not f.info()['Content-Type'].startswith('text/csv'):
-            raise urllib2.HTTPError(url, 403, 'Forbidden - emulated', f.info(), f)
-        return f
-
-    def parse_ticket_body(self, id):
-        # Use CSV export to get ticket fields
-        url = self.full_url(self.TICKET_URL % id, 'csv')
-        f = self.csvopen(url)
-        reader = csv.DictReader(f)
-        ticket_fields = reader.next()
-        ticket_fields['class'] = 'ARTIFACT'
-        return self.remap_fields(ticket_fields)
-
-    def parse_ticket_comments(self, id):
-        # Use RSS export to get ticket comments
-        url = self.full_url(self.TICKET_URL % id, 'rss')
-        self.log_url(url)
-        d = feedparser.parse(url)
-        res = []
-        for comment in d['entries']:
-            c = {}
-            c['submitter'] = comment.author
-            c['date'] = comment.updated_parsed
-            c['comment'] = html2text(comment.summary)
-            c['class'] = 'COMMENT'
-            res.append(c)
-        return res
-
-    def parse_ticket_attachments(self, id):
-        SIZE_PATTERN = r'(\d+) bytes'
-        TIMESTAMP_PATTERN = r'(.+) in Timeline'
-        # Scrape HTML to get ticket attachments
-        url = self.full_url(self.ATTACHMENT_LIST_URL % id)
-        self.log_url(url)
-        f = urllib2.urlopen(url)
-        soup = BeautifulSoup(f)
-        attach = soup.find('div', id='attachments')
-        list = []
-        while attach:
-            attach = attach.findNext('dt')
-            if not attach:
-                break
-            d = {}
-            d['filename'] = attach.a['href'].rsplit('/', 1)[1]
-            d['url'] = self.full_url(self.ATTACHMENT_URL % (id, d['filename']))
-            size_s = attach.span['title']
-            d['size'] = int(self.match_pattern(SIZE_PATTERN, size_s))
-            timestamp_s = attach.find('a', {'class': 'timeline'})['title']
-            d['date'] = self.trac2z_date(self.match_pattern(TIMESTAMP_PATTERN, timestamp_s))
-            d['by'] = attach.find(text=re.compile('added by')).nextSibling.renderContents()
-            d['description'] = ''
-            # Skip whitespace
-            while attach.nextSibling and type(attach.nextSibling) is NavigableString:
-                attach = attach.nextSibling
-            # if there's a description, there will be a <dd> element, other immediately
next <dt>
-            if attach.nextSibling and attach.nextSibling.name == 'dd':
-                desc_el = attach.nextSibling
-                if desc_el:
-                    # TODO: Convert to Allura link syntax as needed
-                    d['description'] = ''.join(desc_el.findAll(text=True)).strip()
-            list.append(d)
-        return list
-
-    def get_max_ticket_id(self):
-        url = self.full_url(self.QUERY_MAX_ID_URL, 'csv')
-        f = self.csvopen(url)
-        reader = csv.DictReader(f)
-        fields = reader.next()
-        print fields
-        return int(fields['id'])
-
-    def get_ticket(self, id, extra={}):
-        '''Get ticket with given id
-        extra: extra fields to add to ticket (parsed elsewhere)
-        '''
-        t = self.parse_ticket_body(id)
-        t['comments'] = self.parse_ticket_comments(id)
-        if options.do_attachments:
-            atts = self.parse_ticket_attachments(id)
-            if atts:
-                t['attachments'] = atts
-        t.update(extra)
-        return t
-
-    def next_ticket_ids(self):
-        'Go thru ticket list and collect available ticket ids.'
-        # We could just do CSV export, which by default dumps entire list
-        # Alas, for many busy servers with long ticket list, it will just
-        # time out. So, let's paginate it instead.
-        res = []
-
-        url = self.full_url(self.QUERY_BY_PAGE_URL % self.page, 'csv')
-        try:
-            f = self.csvopen(url)
-        except urllib2.HTTPError, e:
-            if 'emulated' in e.msg:
-                body = e.fp.read()
-                if 'beyond the number of pages in the query' in body or 'Log in with a SourceForge
account' in body:
-                    raise StopIteration
-            raise
-        reader = csv.reader(f)
-        cols = reader.next()
-        for r in reader:
-            if r and r[0].isdigit():
-                id = int(r[0])
-                extra = {'date': self.trac2z_date(r[1]), 'date_updated': self.trac2z_date(r[2])}
-                res.append((id, extra))
-        self.page += 1
-
-        return res
-
-    def __iter__(self):
-        return self
-
-    def next(self):
-        while True:
-            # queue empty, try to fetch more
-            if len(self.ticket_queue) == 0:
-                self.ticket_queue = self.next_ticket_ids()
-            # there aren't any more, we're really done
-            if len(self.ticket_queue) == 0:
-                raise StopIteration
-            id, extra = self.ticket_queue.pop(0)
-            if id >= self.start_id:
-                break
-        return self.get_ticket(id, extra)
-
-
-class DateJSONEncoder(json.JSONEncoder):
-    def default(self, obj):
-        if isinstance(obj, time.struct_time):
-            return time.strftime('%Y-%m-%dT%H:%M:%SZ', obj)
-        return json.JSONEncoder.default(self, obj)
-
 if __name__ == '__main__':
-    options, args = parse_options()
-    ex = TracExport(args[0], start_id=options.start_id)
-    # Implement iterator sequence limiting using islice()
-    doc = [t for t in islice(ex, options.limit)]
-
-    if not options.only_tickets:
-        doc = {
-            'class': 'PROJECT',
-            'trackers': {'default': {'artifacts': doc}}
-        }
-
-    out_file = sys.stdout
-    if options.out_filename:
-        out_file = open(options.out_filename, 'w')
-    out_file.write(json.dumps(doc, cls=DateJSONEncoder, indent=2, sort_keys=True))
-    # It's bad habit not to terminate lines
-    out_file.write('\n')
+    from allura.scripts.trac_export import main
+    main()


Mime
View raw message