incubator-allura-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From john...@apache.org
Subject [03/23] git commit: [#6139] ticket:399 Improved Trac Wiki import scripts
Date Wed, 31 Jul 2013 19:14:23 GMT
[#6139] ticket:399 Improved Trac Wiki import scripts

* Copy the contents of the trac wiki home page to the Home page in the Allura wiki.
* Replace the wiki-toc div from trac with a Markdown [TOC] tag.
* Added the 'forgewiki.wiki_from_trac' ScriptTask that can do the export/import in one step


Project: http://git-wip-us.apache.org/repos/asf/incubator-allura/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-allura/commit/fcf24fc2
Tree: http://git-wip-us.apache.org/repos/asf/incubator-allura/tree/fcf24fc2
Diff: http://git-wip-us.apache.org/repos/asf/incubator-allura/diff/fcf24fc2

Branch: refs/heads/cj/6461
Commit: fcf24fc2e4fd7aa2bbfbf70cd17d9ba4f9b351da
Parents: 70d4cdb
Author: Vlad Glushchuk <vgluschuk@gmail.com>
Authored: Fri Jul 26 10:15:21 2013 +0300
Committer: Tim Van Steenburgh <tvansteenburgh@gmail.com>
Committed: Tue Jul 30 19:29:23 2013 +0000

----------------------------------------------------------------------
 .../scripts/wiki_from_trac/__init__.py          |  18 ++
 .../scripts/wiki_from_trac/extractors.py        | 226 +++++++++++++++++++
 .../forgewiki/scripts/wiki_from_trac/loaders.py |  72 ++++++
 .../scripts/wiki_from_trac/wiki_from_trac.py    |  69 ++++++
 requirements-optional.txt                       |   3 +
 scripts/allura_import.py                        |   6 +-
 scripts/wiki-export.py                          |  58 +++++
 7 files changed, 450 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/fcf24fc2/ForgeWiki/forgewiki/scripts/wiki_from_trac/__init__.py
----------------------------------------------------------------------
diff --git a/ForgeWiki/forgewiki/scripts/wiki_from_trac/__init__.py b/ForgeWiki/forgewiki/scripts/wiki_from_trac/__init__.py
new file mode 100644
index 0000000..8d3f8b7
--- /dev/null
+++ b/ForgeWiki/forgewiki/scripts/wiki_from_trac/__init__.py
@@ -0,0 +1,18 @@
+#       Licensed to the Apache Software Foundation (ASF) under one
+#       or more contributor license agreements.  See the NOTICE file
+#       distributed with this work for additional information
+#       regarding copyright ownership.  The ASF licenses this file
+#       to you under the Apache License, Version 2.0 (the
+#       "License"); you may not use this file except in compliance
+#       with the License.  You may obtain a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#       Unless required by applicable law or agreed to in writing,
+#       software distributed under the License is distributed on an
+#       "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#       KIND, either express or implied.  See the License for the
+#       specific language governing permissions and limitations
+#       under the License.
+
+from .wiki_from_trac import WikiFromTrac
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/fcf24fc2/ForgeWiki/forgewiki/scripts/wiki_from_trac/extractors.py
----------------------------------------------------------------------
diff --git a/ForgeWiki/forgewiki/scripts/wiki_from_trac/extractors.py b/ForgeWiki/forgewiki/scripts/wiki_from_trac/extractors.py
new file mode 100644
index 0000000..ef931b3
--- /dev/null
+++ b/ForgeWiki/forgewiki/scripts/wiki_from_trac/extractors.py
@@ -0,0 +1,226 @@
+#       Licensed to the Apache Software Foundation (ASF) under one
+#       or more contributor license agreements.  See the NOTICE file
+#       distributed with this work for additional information
+#       regarding copyright ownership.  The ASF licenses this file
+#       to you under the Apache License, Version 2.0 (the
+#       "License"); you may not use this file except in compliance
+#       with the License.  You may obtain a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#       Unless required by applicable law or agreed to in writing,
+#       software distributed under the License is distributed on an
+#       "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#       KIND, either express or implied.  See the License for the
+#       specific language governing permissions and limitations
+#       under the License.
+
+import re
+import sys
+import json
+from urllib import quote, unquote
+from urlparse import urljoin, urlsplit
+
+try:
+    import requests
+except:
+    # Ignore this import if the requests package is not installed
+    pass
+
+try:
+    # Ignore this import if the html2text package is not installed
+    import html2text
+except:
+    pass
+
+from BeautifulSoup import BeautifulSoup
+
+
+class WikiExporter(object):
+
+    PAGE_LIST_URL = 'wiki/TitleIndex'
+    PAGE_URL = 'wiki/%s'
+    CONTENT_DIV_ATTRS = {'class': 'wikipage searchable'}
+    EXCLUDE_PAGES = [
+        'CamelCase',
+        'InterMapTxt',
+        'InterTrac',
+        'InterWiki',
+        'PageTemplates',
+        'SandBox',
+        'TitleIndex',
+        'TracAccessibility',
+        'TracAdmin',
+        'TracBackup',
+        'TracBrowser',
+        'TracChangeset',
+        'TracEnvironment',
+        'TracFineGrainedPermissions',
+        'TracGuide',
+        'TracImport',
+        'TracIni',
+        'TracInterfaceCustomization',
+        'TracLinks',
+        'TracLogging',
+        'TracNavigation',
+        'TracNotification',
+        'TracPermissions',
+        'TracPlugins',
+        'TracQuery',
+        'TracReports',
+        'TracRevisionLog',
+        'TracRoadmap',
+        'TracRss',
+        'TracSearch',
+        'TracSupport',
+        'TracSyntaxColoring',
+        'TracTickets',
+        'TracTicketsCustomFields',
+        'TracTimeline',
+        'TracUnicode',
+        'TracWiki',
+        'TracWorkflow',
+        'WikiDeletePage',
+        'WikiFormatting',
+        'WikiHtml',
+        'WikiMacros',
+        'WikiNewPage',
+        'WikiPageNames',
+        'WikiProcessors',
+        'WikiRestructuredText',
+        'WikiRestructuredTextLinks',
+        'RecentChanges',
+    ]
+    RENAME_PAGES = {
+        'WikiStart': 'Home',  # Change the start page name to Home
+        'Home': 'WikiStart',  # Rename the Home page to WikiStart
+    }
+
+    def __init__(self, base_url, options):
+        self.base_url = base_url
+        self.options = options
+
+    def export(self, out):
+        pages = [self.get_page(title) for title in self.page_list()]
+        out.write(json.dumps(pages, indent=2, sort_keys=True))
+        out.write('\n')
+
+    def log(self, msg):
+        if self.options.verbose:
+            print >>sys.stderr, msg
+
+    def url(self, suburl, type=None):
+        url = urljoin(self.base_url, suburl)
+        if type is None:
+            return url
+        glue = '&' if '?' in suburl else '?'
+        return  url + glue + 'format=' + type
+
+    def fetch(self, url, **kwargs):
+        return requests.get(url, **kwargs)
+
+    def page_list(self):
+        url = urljoin(self.base_url, self.PAGE_LIST_URL)
+        self.log('Fetching list of pages from %s' % url)
+        r = self.fetch(url)
+        html = BeautifulSoup(r.content)
+        pages = html.find('div', attrs=self.CONTENT_DIV_ATTRS) \
+                    .find('ul').findAll('li')
+        pages = [page.find('a').text
+                 for page in pages
+                 if page.find('a')
+                 and page.find('a').text not in self.EXCLUDE_PAGES]
+        # Remove duplicate entries by converting page list to a set.
+        # As we're going to fetch all listed pages,
+        # it's safe to destroy the original order of pages.
+        return set(pages)
+
+    def get_page(self, title):
+        title = quote(title)
+        convert_method = '_get_page_' + self.options.converter
+        content = getattr(self, convert_method)(title)
+        page = {
+            'title': self.convert_title(title),
+            'text': self.convert_content(content),
+            'labels': '',
+        }
+        return page
+
+    def _get_page_html2text(self, title):
+        url = self.url(self.PAGE_URL % title)
+        self.log('Fetching page %s' % url)
+        r = self.fetch(url)
+        html = BeautifulSoup(r.content)
+        return html.find('div', attrs=self.CONTENT_DIV_ATTRS)
+
+    def _get_page_regex(self, title):
+        url = self.url(self.PAGE_URL % title, 'txt')
+        self.log('Fetching page %s' % url)
+        r = self.fetch(url)
+        return r.content
+
+    def convert_title(self, title):
+        title = self.RENAME_PAGES.get(title, title)
+        title = title.replace('/', '-')  # Handle subpages
+        title = title.rstrip('?')  # Links to non-existent pages ends with '?'
+        return title
+
+    def convert_content(self, content):
+        convert_method = '_convert_content_' + self.options.converter
+        return getattr(self, convert_method)(content)
+
+    def _convert_wiki_toc_to_markdown(self, content):
+        """
+        Removes contents of div.wiki-toc elements and replaces them with
+        the '[TOC]' markdown macro.
+        """
+        for toc in content('div', attrs={'class': 'wiki-toc'}):
+            toc.string = '[TOC]'
+        return content
+
+    def _convert_content_html2text(self, content):
+        html2text.BODY_WIDTH = 0  # Don't wrap lines
+        content = self._convert_wiki_toc_to_markdown(content)
+        content = html2text.html2text(unicode(content))
+        # Convert internal links
+        internal_url = urlsplit(self.base_url).path + 'wiki/'
+        internal_link_re = r'\[([^]]+)\]\(%s([^)]*)\)' % internal_url
+        internal_link = re.compile(internal_link_re, re.UNICODE)
+        def sub(match):
+            caption = match.group(1)
+            page = self.convert_title(match.group(2))
+            if caption == page:
+                link = '[%s]' % unquote(page)
+            else:
+                link = '[%s](%s)' % (caption, page)
+            return link
+        return internal_link.sub(sub, content)
+
+    def _convert_content_regex(self, text):
+        # https://gist.github.com/sgk/1286682
+        text = re.sub('\r\n', '\n', text)
+        text = re.sub(r'{{{(.*?)}}}', r'`\1`', text)
+
+        def indent4(m):
+            return '\n    ' + m.group(1).replace('\n', '\n    ')
+
+        text = re.sub(r'(?sm){{{\n(.*?)\n}}}', indent4, text)
+        text = re.sub(r'(?m)^====\s+(.*?)\s+====$', r'#### \1', text)
+        text = re.sub(r'(?m)^===\s+(.*?)\s+===$', r'### \1', text)
+        text = re.sub(r'(?m)^==\s+(.*?)\s+==$', r'## \1', text)
+        text = re.sub(r'(?m)^=\s+(.*?)\s+=$', r'# \1', text)
+        text = re.sub(r'^       * ', r'****', text)
+        text = re.sub(r'^     * ', r'***', text)
+        text = re.sub(r'^   * ', r'**', text)
+        text = re.sub(r'^ * ', r'*', text)
+        text = re.sub(r'^ \d+. ', r'1.', text)
+        a = []
+        for line in text.split('\n'):
+            if not line.startswith('    '):
+                line = re.sub(r'\[(https?://[^\s\[\]]+)\s([^\[\]]+)\]', r'[\2](\1)', line)
+                line = re.sub(r'\[(wiki:[^\s\[\]]+)\s([^\[\]]+)\]', r'[\2](/\1/)', line)
+                line = re.sub(r'\!(([A-Z][a-z0-9]+){2,})', r'\1', line)
+                line = re.sub(r'\'\'\'(.*?)\'\'\'', r'*\1*', line)
+                line = re.sub(r'\'\'(.*?)\'\'', r'_\1_', line)
+            a.append(line)
+        return '\n'.join(a)

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/fcf24fc2/ForgeWiki/forgewiki/scripts/wiki_from_trac/loaders.py
----------------------------------------------------------------------
diff --git a/ForgeWiki/forgewiki/scripts/wiki_from_trac/loaders.py b/ForgeWiki/forgewiki/scripts/wiki_from_trac/loaders.py
new file mode 100644
index 0000000..55e4480
--- /dev/null
+++ b/ForgeWiki/forgewiki/scripts/wiki_from_trac/loaders.py
@@ -0,0 +1,72 @@
+#       Licensed to the Apache Software Foundation (ASF) under one
+#       or more contributor license agreements.  See the NOTICE file
+#       distributed with this work for additional information
+#       regarding copyright ownership.  The ASF licenses this file
+#       to you under the Apache License, Version 2.0 (the
+#       "License"); you may not use this file except in compliance
+#       with the License.  You may obtain a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#       Unless required by applicable law or agreed to in writing,
+#       software distributed under the License is distributed on an
+#       "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#       KIND, either express or implied.  See the License for the
+#       specific language governing permissions and limitations
+#       under the License.
+
+import json
+from optparse import OptionParser
+
+from allura.lib.import_api import AlluraImportApiClient
+
+
+def load_data(doc_file_name=None, optparser=None, options=None):
+    import_options = {}
+    for s in options.import_opts:
+        k, v = s.split('=', 1)
+        if v == 'false':
+            v = False
+        import_options[k] = v
+
+    user_map = {}
+    if options.user_map_file:
+        f = open(options.user_map_file)
+        try:
+            user_map = json.load(f)
+            if type(user_map) is not type({}):
+                raise ValueError
+            for k, v in user_map.iteritems():
+                print k, v
+                if not isinstance(k, basestring) or not isinstance(v, basestring):
+                    raise ValueError
+        except ValueError:
+            optparser.error('--user-map should specify JSON file with format {"original_user":
"sf_user", ...}')
+        finally:
+            f.close()
+
+    import_options['user_map'] = user_map
+
+    cli = AlluraImportApiClient(options.base_url, options.api_key, options.secret_key, options.verbose)
+    doc_txt = open(doc_file_name).read()
+
+    if options.wiki:
+        import_wiki(cli, options.project, options.wiki, options, doc_txt)
+
+
+def import_wiki(cli, project, tool, options, doc_txt):
+    url = '/rest/p/' + project + '/' + tool
+    doc = json.loads(doc_txt)
+    if 'wiki' in doc and 'default' in doc['wiki'] and 'artifacts' in doc['wiki']['default']:
+        pages = doc['trackers']['default']['artifacts']
+    else:
+        pages = doc
+    if options.verbose:
+        print "Processing %d pages" % len(pages)
+    for page in pages:
+        title = page.pop('title').encode('utf-8')
+        page['text'] = page['text'].encode('utf-8')
+        page['labels'] = page['labels'].encode('utf-8')
+        r = cli.call(url + '/' + title, **page)
+        assert r == {}
+        print 'Imported wiki page %s' % title

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/fcf24fc2/ForgeWiki/forgewiki/scripts/wiki_from_trac/wiki_from_trac.py
----------------------------------------------------------------------
diff --git a/ForgeWiki/forgewiki/scripts/wiki_from_trac/wiki_from_trac.py b/ForgeWiki/forgewiki/scripts/wiki_from_trac/wiki_from_trac.py
new file mode 100644
index 0000000..b08df4f
--- /dev/null
+++ b/ForgeWiki/forgewiki/scripts/wiki_from_trac/wiki_from_trac.py
@@ -0,0 +1,69 @@
+#       Licensed to the Apache Software Foundation (ASF) under one
+#       or more contributor license agreements.  See the NOTICE file
+#       distributed with this work for additional information
+#       regarding copyright ownership.  The ASF licenses this file
+#       to you under the Apache License, Version 2.0 (the
+#       "License"); you may not use this file except in compliance
+#       with the License.  You may obtain a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#       Unless required by applicable law or agreed to in writing,
+#       software distributed under the License is distributed on an
+#       "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#       KIND, either express or implied.  See the License for the
+#       specific language governing permissions and limitations
+#       under the License.
+
+import argparse
+import logging
+from tempfile import NamedTemporaryFile
+from tg.decorators import cached_property
+
+from extractors import WikiExporter
+from loaders import load_data
+
+from allura.scripts import ScriptTask
+
+
+log = logging.getLogger(__name__)
+
+
+class WikiFromTrac(ScriptTask):
+    """Import Trac Wiki to Allura Wiki"""
+    @classmethod
+    def parser(cls):
+        parser = argparse.ArgumentParser(description='Import wiki from'
+            'Trac to allura wiki')
+
+        parser.add_argument('trac_url', type=str, help='Trac URL')
+        parser.add_argument('-a', '--api-ticket', dest='api_key', help='API ticket')
+        parser.add_argument('-s', '--secret-key', dest='secret_key', help='Secret key')
+        parser.add_argument('-p', '--project', dest='project', help='Project to import to')
+        parser.add_argument('-t', '--tracker', dest='tracker', help='Tracker to import to')
+        parser.add_argument('-f', '--forum', dest='forum', help='Forum tool to import to')
+        parser.add_argument('-w', '--wiki', dest='wiki', help='Wiki tool to import to')
+        parser.add_argument('-u', '--base-url', dest='base_url', default='https://sourceforge.net',
help='Base Allura (%(default)s for default)')
+        parser.add_argument('-o', dest='import_opts', default=[], action='append', help='Specify
import option(s)', metavar='opt=val')
+        parser.add_argument('--user-map', dest='user_map_file', help='Map original users
to SF.net users', metavar='JSON_FILE')
+        parser.add_argument('--validate', dest='validate', action='store_true', help='Validate
import data')
+        parser.add_argument('-v', '--verbose', dest='verbose', action='store_true', help='Verbose
operation')
+        parser.add_argument('-c', '--continue', dest='cont', action='store_true', help='Continue
import into existing tracker')
+        parser.add_argument('-C', '--converter', dest='converter',
+                            default='html2text',
+                            help='Converter to use on wiki text. '
+                                 'Available options: '
+                                 'html2text (default) or regex')
+
+        return parser
+
+    @classmethod
+    def execute(cls, options):
+        with NamedTemporaryFile() as f:
+            WikiExporter(options.trac_url, options).export(f)
+            f.flush()
+            load_data(f.name, cls.parser(), options)
+
+
+if __name__ == '__main__':
+    WikiFromTrac.main()

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/fcf24fc2/requirements-optional.txt
----------------------------------------------------------------------
diff --git a/requirements-optional.txt b/requirements-optional.txt
index accdc43..da1f205 100644
--- a/requirements-optional.txt
+++ b/requirements-optional.txt
@@ -15,3 +15,6 @@ MySQL-python  # GPL
 # One or the other is required to enable spam checking
 akismet==0.2.0
 PyMollom==0.1  # GPL
+
+# For wiki-export script
+requests

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/fcf24fc2/scripts/allura_import.py
----------------------------------------------------------------------
diff --git a/scripts/allura_import.py b/scripts/allura_import.py
index ddcd588..b05d524 100644
--- a/scripts/allura_import.py
+++ b/scripts/allura_import.py
@@ -17,10 +17,10 @@
 
 import json
 from optparse import OptionParser
-from datetime import datetime
 
 from allura.lib.import_api import AlluraImportApiClient
 from forgetracker.scripts.import_tracker import import_tracker
+from forgewiki.scripts.wiki_from_trac.loaders import import_wiki
 
 
 def main():
@@ -61,6 +61,8 @@ def main():
                        verbose=options.verbose)
     elif options.forum:
         import_forum(cli, options.project, options.forum, user_map, doc_txt, validate=options.validate)
+    elif options.wiki:
+        import_wiki(cli, options.project, options.wiki, options, doc_txt)
 
 
 def import_forum(cli, project, tool, user_map, doc_txt, validate=True):
@@ -82,6 +84,7 @@ Import project data dump in JSON format into an Allura project.''')
     optparser.add_option('-p', '--project', dest='project', help='Project to import to')
     optparser.add_option('-t', '--tracker', dest='tracker', help='Tracker to import to')
     optparser.add_option('-f', '--forum', dest='forum', help='Forum tool to import to')
+    optparser.add_option('-w', '--wiki', dest='wiki', help='Wiki tool to import to')
     optparser.add_option('-u', '--base-url', dest='base_url', default='https://sourceforge.net',
help='Base Allura URL (%default)')
     optparser.add_option('-o', dest='import_opts', default=[], action='append', help='Specify
import option(s)', metavar='opt=val')
     optparser.add_option('--user-map', dest='user_map_file', help='Map original users to
SF.net users', metavar='JSON_FILE')
@@ -100,4 +103,3 @@ Import project data dump in JSON format into an Allura project.''')
 
 if __name__ == '__main__':
     main()
-

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/fcf24fc2/scripts/wiki-export.py
----------------------------------------------------------------------
diff --git a/scripts/wiki-export.py b/scripts/wiki-export.py
new file mode 100755
index 0000000..55baa04
--- /dev/null
+++ b/scripts/wiki-export.py
@@ -0,0 +1,58 @@
+#!/usr/bin/env python
+
+#       Licensed to the Apache Software Foundation (ASF) under one
+#       or more contributor license agreements.  See the NOTICE file
+#       distributed with this work for additional information
+#       regarding copyright ownership.  The ASF licenses this file
+#       to you under the Apache License, Version 2.0 (the
+#       "License"); you may not use this file except in compliance
+#       with the License.  You may obtain a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#       Unless required by applicable law or agreed to in writing,
+#       software distributed under the License is distributed on an
+#       "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#       KIND, either express or implied.  See the License for the
+#       specific language governing permissions and limitations
+#       under the License.
+
+
+import json
+import sys
+from optparse import OptionParser
+
+from forgewiki.scripts.wiki_from_trac.extractors import WikiExporter
+
+
+def parse_options():
+    parser = OptionParser(
+        usage='%prog <Trac URL>\n\nExport wiki pages from a trac instance')
+
+    parser.add_option('-o', '--out-file', dest='out_filename',
+                      help='Write to file (default stdout)')
+    parser.add_option('-v', '--verbose', dest='verbose', action='store_true',
+                      help='Verbose operation')
+    parser.add_option('-c', '--converter', dest='converter',
+                      default='html2text',
+                      help='Converter to use on wiki text. '
+                           'Available options: html2text (default) or regex')
+    options, args = parser.parse_args()
+    if len(args) != 1:
+        parser.error('Wrong number of arguments.')
+    converters = ['html2text', 'regex']
+    if options.converter not in converters:
+        parser.error('Wrong converter. Available options: ' +
+                     ', '.join(converters))
+    return options, args
+
+
+if __name__ == '__main__':
+    options, args = parse_options()
+    exporter = WikiExporter(args[0], options)
+
+    out = sys.stdout
+    if options.out_filename:
+        out = open(options.out_filename, 'w')
+
+    exporter.export(out)
\ No newline at end of file


Mime
View raw message