incubator-allura-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From wwitz...@apache.org
Subject git commit: [#6769] Made GC Ticket Importer more resilient to upstream changes during import
Date Fri, 18 Oct 2013 11:52:32 GMT
Updated Branches:
  refs/heads/master 80247fc25 -> e1967ec47


[#6769] Made GC Ticket Importer more resilient to upstream changes during import

Signed-off-by: Cory Johns <cjohns@slashdotmedia.com>


Project: http://git-wip-us.apache.org/repos/asf/incubator-allura/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-allura/commit/e1967ec4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-allura/tree/e1967ec4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-allura/diff/e1967ec4

Branch: refs/heads/master
Commit: e1967ec4776db70e68e8599ba86adef1690524ab
Parents: 80247fc
Author: Cory Johns <cjohns@slashdotmedia.com>
Authored: Thu Oct 17 23:20:35 2013 +0000
Committer: Cory Johns <cjohns@slashdotmedia.com>
Committed: Thu Oct 17 23:20:35 2013 +0000

----------------------------------------------------------------------
 .../forgeimporters/google/__init__.py           | 30 ++++++++-----
 .../tests/google/test_extractor.py              | 44 ++++++++++++++++++++
 2 files changed, 64 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/e1967ec4/ForgeImporters/forgeimporters/google/__init__.py
----------------------------------------------------------------------
diff --git a/ForgeImporters/forgeimporters/google/__init__.py b/ForgeImporters/forgeimporters/google/__init__.py
index 3313ee8..8dcd876 100644
--- a/ForgeImporters/forgeimporters/google/__init__.py
+++ b/ForgeImporters/forgeimporters/google/__init__.py
@@ -89,7 +89,7 @@ class GoogleCodeProjectExtractor(ProjectExtractor):
     PAGE_MAP = {
             'project_info': BASE_URL + '/p/{project_name}/',
             'source_browse': BASE_URL + '/p/{project_name}/source/browse/',
-            'issues_csv': BASE_URL + '/p/{project_name}/issues/csv?can=1&colspec=ID&start={start}',
+            'issues_csv': BASE_URL + '/p/{project_name}/issues/csv?can=1&colspec=ID&sort=ID&start={start}',
             'issue': BASE_URL + '/p/{project_name}/issues/detail?id={issue_id}',
         }
 
@@ -148,14 +148,9 @@ class GoogleCodeProjectExtractor(ProjectExtractor):
         Iterate over all issues for a project,
         using paging to keep the responses reasonable.
         """
-        start = 0
-        limit = 100
-
-        extractor = cls(project_name, 'issues_csv', parser=csv_parser, start=start)
-        while extractor.page:
-            if len(extractor.page) <= 0:
-                return
-            for issue_id in extractor.page:
+        issue_ids = cls.get_issue_ids(project_name, start=0)
+        while issue_ids:
+            for issue_id in sorted(issue_ids):
                 try:
                     yield (int(issue_id), cls(project_name, 'issue', issue_id=issue_id))
                 except HTTPError as e:
@@ -164,8 +159,23 @@ class GoogleCodeProjectExtractor(ProjectExtractor):
                         continue
                     else:
                         raise
+            # get any new issues that were created while importing
+            # (jumping back a few in case some were deleted and new ones added)
+            new_ids = cls.get_issue_ids(project_name, start=len(issue_ids)-10)
+            issue_ids = new_ids - issue_ids
+
+    def get_issue_ids(self, project_name, start=0):
+        limit = 100
+
+        issue_ids = set()
+        page = self.get_page('issues_csv', parser=csv_parser, start=start)
+        while page:
+            if len(page) <= 0:
+                return
+            issue_ids.update(page)
             start += limit
-            extractor.get_page('issues_csv', parser=csv_parser, start=start)
+            page = self.get_page('issues_csv', parser=csv_parser, start=start)
+        return issue_ids
 
     def get_issue_summary(self):
         text = self.page.find(id='issueheader').findAll('td', limit=2)[1].span.text.strip()

http://git-wip-us.apache.org/repos/asf/incubator-allura/blob/e1967ec4/ForgeImporters/forgeimporters/tests/google/test_extractor.py
----------------------------------------------------------------------
diff --git a/ForgeImporters/forgeimporters/tests/google/test_extractor.py b/ForgeImporters/forgeimporters/tests/google/test_extractor.py
index 2fe82fa..296f12d 100644
--- a/ForgeImporters/forgeimporters/tests/google/test_extractor.py
+++ b/ForgeImporters/forgeimporters/tests/google/test_extractor.py
@@ -17,6 +17,7 @@
 
 from unittest import TestCase
 import pkg_resources
+from urllib2 import HTTPError
 
 import mock
 from datadiff.tools import assert_equal
@@ -288,6 +289,49 @@ class TestGoogleCodeProjectExtractor(TestCase):
             self.assertEqual(actual.updates, expected['updates'])
             self.assertEqual([a.filename for a in actual.attachments], expected['attachments'])
 
+    def test_get_issue_ids(self):
+        extractor = google.GoogleCodeProjectExtractor(None)
+        extractor.get_page = mock.Mock(side_effect=((1, 2, 3),(2, 3, 4), ()))
+        self.assertItemsEqual(extractor.get_issue_ids('foo', start=10), (1, 2, 3, 4))
+        self.assertEqual(extractor.get_page.call_count, 3)
+        extractor.get_page.assert_has_calls([
+                mock.call('issues_csv', parser=google.csv_parser, start=10),
+                mock.call('issues_csv', parser=google.csv_parser, start=110),
+                mock.call('issues_csv', parser=google.csv_parser, start=210),
+            ])
+
+    @mock.patch.object(google.GoogleCodeProjectExtractor, 'get_page')
+    @mock.patch.object(google.GoogleCodeProjectExtractor, 'get_issue_ids')
+    def test_iter_issue_ids(self, get_issue_ids, get_page):
+        get_issue_ids.side_effect = [set([1, 2]), set([2, 3, 4])]
+        issue_ids = [i for i,e in list(google.GoogleCodeProjectExtractor.iter_issues('foo'))]
+        self.assertEqual(issue_ids, [1, 2, 3, 4])
+        get_issue_ids.assert_has_calls([
+                mock.call('foo', start=0),
+                mock.call('foo', start=-8),
+            ])
+
+    @mock.patch.object(google.GoogleCodeProjectExtractor, '__init__')
+    @mock.patch.object(google.GoogleCodeProjectExtractor, 'get_issue_ids')
+    def test_iter_issue_ids_raises(self, get_issue_ids, __init__):
+        get_issue_ids.side_effect = [set([1, 2, 3, 4, 5])]
+        __init__.side_effect = [
+                None,
+                HTTPError('fourohfour', 404, 'fourohfour', {}, mock.Mock()),  # should skip
but keep going
+                None,
+                HTTPError('fubar', 500, 'fubar', {}, mock.Mock()),  # should be re-raised
+                None,
+            ]
+        issue_ids = []
+        try:
+            for issue_id, extractor in google.GoogleCodeProjectExtractor.iter_issues('foo'):
+                issue_ids.append(issue_id)
+        except HTTPError as e:
+            self.assertEqual(e.code, 500)
+        else:
+            assert False, 'Missing expected raised exception'
+        self.assertEqual(issue_ids, [1, 3])
+
 class TestUserLink(TestCase):
     def test_plain(self):
         tag = mock.Mock()


Mime
View raw message